Re: [PATCH] Optimize _Float16 usage for non AVX512FP16.

2021-11-28 Thread Uros Bizjak via Gcc-patches
On Mon, Nov 29, 2021 at 8:46 AM liuhongt  wrote:
>
> As discussed in PR, this patch do optimizations:
> 1. No memory is needed to move HI/HFmode between GPR and SSE registers
> under TARGET_SSE2 and above, pinsrw/pextrw are used for them w/o
> AVX512FP16.
> 2. Use gen_sse2_pinsrph/gen_vec_setv4sf_0 to replace
> ix86_expand_vector_set in extendhfsf2/truncsfhf2 so that redundant
> initialization cound be eliminated.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
> x86_64-pc-linux-gnu{-m32\ -march=cadcadelake,\ -march=cascadelake}
> Ok for trunk?
>
> gcc/ChangeLog:
>
> PR target/102811
> * config/i386/i386.c (inline_secondary_memory_needed): HImode
> move between GPR and SSE registers is supported under
> TARGET_SSE2 and above.
> * config/i386/i386.md (extendhfsf2): Optimize expander.
> (truncsfhf2): Ditto.
> * config/i386/sse.md (sse2p4_1): Adjust attr for V8HFmode to
> align with V8HImode.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr102811-2.c: New test.
> * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: Add new
> scan-assembler-times.
> ---
>  gcc/config/i386/i386.c|  5 +++--
>  gcc/config/i386/i386.md   | 18 +++
>  gcc/config/i386/sse.md|  2 +-
>  .../i386/avx512vl-vcvtps2ph-pr102811.c|  2 +-
>  gcc/testsuite/gcc.target/i386/pr102811-2.c| 22 +++
>  5 files changed, 41 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102811-2.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 7cf599f57f7..2657e7817ae 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19437,8 +19437,9 @@ inline_secondary_memory_needed (machine_mode mode, 
> reg_class_t class1,
>if (msize > UNITS_PER_WORD)
> return true;
>
> -  /* In addition to SImode moves, AVX512FP16 also enables HImode moves.  
> */
> -  int minsize = GET_MODE_SIZE (TARGET_AVX512FP16 ? HImode : SImode);
> +  /* In addition to SImode moves, HImode moves are supported for SSE2 
> and above,
> +Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16.  */
> +  int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
>
>if (msize < minsize)
> return true;
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 2cb3e727588..070758edb66 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -4617,9 +4617,18 @@ (define_expand "extendhfsf2"
>if (!TARGET_AVX512FP16)
>  {
>rtx res = gen_reg_rtx (V4SFmode);
> -  rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +  rtx tmp = gen_reg_rtx (V8HFmode);
> +  rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
>
> -  ix86_expand_vector_set (false, tmp, operands[1], 0);
> +  if (TARGET_AVX2)
> +   {
> + rtx dup = gen_reg_rtx (V8HFmode);
> + emit_move_insn (dup, gen_rtx_VEC_DUPLICATE (V8HFmode, operands[1]));
> + emit_move_insn (tmp, gen_rtx_VEC_MERGE (V8HFmode, dup,
> + zero, const1_rtx));
> +   }
> +  else
> +   emit_insn (gen_sse2_pinsrph (tmp, zero, operands[1], const1_rtx));
>emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
>emit_move_insn (operands[0], gen_lowpart (SFmode, res));
>DONE;
> @@ -4833,9 +4842,10 @@ (define_expand "truncsfhf2"
>  if (!TARGET_AVX512FP16)
>  {
>rtx res = gen_reg_rtx (V8HFmode);
> -  rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
> +  rtx tmp = gen_reg_rtx (V4SFmode);
> +  rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
>
> -  ix86_expand_vector_set (false, tmp, operands[1], 0);
> +  emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1]));
>emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT 
> (4)));
>emit_move_insn (operands[0], gen_lowpart (HFmode, res));
>DONE;
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 5229b23af98..b371b140eb1 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -17272,7 +17272,7 @@ (define_mode_iterator PINSR_MODE
> (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
>
>  (define_mode_attr sse2p4_1
> -  [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse4_1")
> +  [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse2")
> (V4SI "sse4_1") (V2DI "sse4_1")])
>
>  (define_mode_attr pinsr_evex_isa
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c 
> b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> index dfbfb167953..9a6c432c866 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> 

Re: [PATCH] Fix regression introduced by r12-5536.

2021-11-28 Thread Uros Bizjak via Gcc-patches
On Mon, Nov 29, 2021 at 2:32 AM liuhongt  wrote:
>
> There're several failures reported in [1]:
> 1.  unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> %vpextrw should be used in output templates.
> 2. ICE in get_attr_memory for movhi_internal since some alternatives
> are marked as TYPE_SSELOG.
> Explicitly set memory_attr for those alternatives.
>
> Also this patch fixs a typo and some latent bugs which are related to
> moving HImode from/to sse register w/o TARGET_AVX512FP16.
>
> For optimization issues discussed in PR102811, I'll create another patch for
> it.
> [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
> x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake}
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/i386/i386.c (ix86_secondary_reload): Without
> TARGET_SSE4_1, General register is needed to move HImode from
> sse register to memory.
> * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of
> pextrw in output templates.
> * config/i386/i386.md (movhi_internal): Ditto, also fix typo of
> MEM_P (operands[1]) and adjust memory/mode/prefix/type
> attribute for alternatives related to sse register.

OK, but please use sselog1 type instead so you don't need to introduce
the memory attribute.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c  |  2 +-
>  gcc/config/i386/i386.md | 44 ++---
>  gcc/config/i386/sse.md  |  6 +++---
>  3 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 3dedf522c42..7cf599f57f7 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t 
> rclass,
>  }
>
>/* Require movement to gpr, and then store to memory.  */
> -  if (mode == HFmode
> +  if ((mode == HFmode || mode == HImode)
>&& !TARGET_SSE4_1
>&& SSE_CLASS_P (rclass)
>&& !in_p && MEM_P (x))
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 68606e57e60..2cb3e727588 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal"
>  case TYPE_SSELOG:
>if (SSE_REG_P (operands[0]))
> return MEM_P (operands[1])
> - ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> - : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> + ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
> + : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
>else
> -   return MEM_P (operands[1])
> - ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> - : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +   return MEM_P (operands[0])
> + ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
> + : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
>
>  case TYPE_MSKLOG:
>if (operands[1] == const0_rtx)
> @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal"
>]
>(const_string "*")))
> (set (attr "type")
> - (cond [(eq_attr "alternative" "9,10,11,12,13")
> + (cond [(eq_attr "alternative" "9,10,12,13")
>   (if_then_else (match_test "TARGET_AVX512FP16")
> (const_string "ssemov")
> (const_string "sselog"))
> (eq_attr "alternative" "4,5,6,7")
>   (const_string "mskmov")
> +   (eq_attr "alternative" "11")
> + (const_string "ssemov")
> (eq_attr "alternative" "8")
>   (const_string "msklog")
> (match_test "optimize_function_for_size_p (cfun)")
> @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal"
>   (const_string "imovx")
>]
>(const_string "imov")))
> +(set (attr "memory")
> +(cond [(eq_attr "alternative" "9,10")
> + (const_string "none")
> +   (eq_attr "alternative" "12")
> + (const_string "load")
> +   (eq_attr "alternative" "13")
> + (const_string "store")
> +   ]
> +   (const_string "*")))

Please use sselog1 type instead, and the memory attribute will be
calculated correctly.

>  (set (attr "prefix")
> -  (if_then_else (eq_attr "alternative" "4,5,6,7,8")
> -   (const_string "vex")
> -   (const_string "orig")))
> +(cond [(eq_attr "alternative" "9,10,11,12,13")
> + (const_string "maybe_evex")
> +   (eq_attr "alternative" "4,5,6,7,8")
> + (const_string "vex")
> +  ]
> +  (const_string "orig")))
>  (set (attr "mode")
>(cond [(eq_attr "type" "imovx")
>(const_string "SI")
> +(eq_attr "alternative" "9,10,12,13")
> +  (if_then_else (match_test "TARGET_AVX512FP16")
> +(const_string "HI")
> + 

[PATCH] Optimize _Float16 usage for non AVX512FP16.

2021-11-28 Thread liuhongt via Gcc-patches
As discussed in PR, this patch do optimizations:
1. No memory is needed to move HI/HFmode between GPR and SSE registers
under TARGET_SSE2 and above, pinsrw/pextrw are used for them w/o
AVX512FP16.
2. Use gen_sse2_pinsrph/gen_vec_setv4sf_0 to replace
ix86_expand_vector_set in extendhfsf2/truncsfhf2 so that redundant
initialization cound be eliminated.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
x86_64-pc-linux-gnu{-m32\ -march=cadcadelake,\ -march=cascadelake}
Ok for trunk?

gcc/ChangeLog:

PR target/102811
* config/i386/i386.c (inline_secondary_memory_needed): HImode
move between GPR and SSE registers is supported under
TARGET_SSE2 and above.
* config/i386/i386.md (extendhfsf2): Optimize expander.
(truncsfhf2): Ditto.
* config/i386/sse.md (sse2p4_1): Adjust attr for V8HFmode to
align with V8HImode.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr102811-2.c: New test.
* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: Add new
scan-assembler-times.
---
 gcc/config/i386/i386.c|  5 +++--
 gcc/config/i386/i386.md   | 18 +++
 gcc/config/i386/sse.md|  2 +-
 .../i386/avx512vl-vcvtps2ph-pr102811.c|  2 +-
 gcc/testsuite/gcc.target/i386/pr102811-2.c| 22 +++
 5 files changed, 41 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102811-2.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7cf599f57f7..2657e7817ae 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19437,8 +19437,9 @@ inline_secondary_memory_needed (machine_mode mode, 
reg_class_t class1,
   if (msize > UNITS_PER_WORD)
return true;
 
-  /* In addition to SImode moves, AVX512FP16 also enables HImode moves.  */
-  int minsize = GET_MODE_SIZE (TARGET_AVX512FP16 ? HImode : SImode);
+  /* In addition to SImode moves, HImode moves are supported for SSE2 and 
above,
+Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16.  */
+  int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
 
   if (msize < minsize)
return true;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2cb3e727588..070758edb66 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4617,9 +4617,18 @@ (define_expand "extendhfsf2"
   if (!TARGET_AVX512FP16)
 {
   rtx res = gen_reg_rtx (V4SFmode);
-  rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+  rtx tmp = gen_reg_rtx (V8HFmode);
+  rtx zero = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
 
-  ix86_expand_vector_set (false, tmp, operands[1], 0);
+  if (TARGET_AVX2)
+   {
+ rtx dup = gen_reg_rtx (V8HFmode);
+ emit_move_insn (dup, gen_rtx_VEC_DUPLICATE (V8HFmode, operands[1]));
+ emit_move_insn (tmp, gen_rtx_VEC_MERGE (V8HFmode, dup,
+ zero, const1_rtx));
+   }
+  else
+   emit_insn (gen_sse2_pinsrph (tmp, zero, operands[1], const1_rtx));
   emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
   emit_move_insn (operands[0], gen_lowpart (SFmode, res));
   DONE;
@@ -4833,9 +4842,10 @@ (define_expand "truncsfhf2"
 if (!TARGET_AVX512FP16)
 {
   rtx res = gen_reg_rtx (V8HFmode);
-  rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
+  rtx tmp = gen_reg_rtx (V4SFmode);
+  rtx zero = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
 
-  ix86_expand_vector_set (false, tmp, operands[1], 0);
+  emit_insn (gen_vec_setv4sf_0 (tmp, zero, operands[1]));
   emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT 
(4)));
   emit_move_insn (operands[0], gen_lowpart (HFmode, res));
   DONE;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5229b23af98..b371b140eb1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17272,7 +17272,7 @@ (define_mode_iterator PINSR_MODE
(V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
 
 (define_mode_attr sse2p4_1
-  [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse4_1")
+  [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse2")
(V4SI "sse4_1") (V2DI "sse4_1")])
 
 (define_mode_attr pinsr_evex_isa
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c 
b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
index dfbfb167953..9a6c432c866 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
-/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 1 } } */
 /* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
 /* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
 /* { 

[Bug target/103463] [12 Regression] ICE: in ix86_attr_length_immediate_default, at config/i386/i386.c:16686 with -Os -fno-tree-dominator-opts -fno-tree-vrp

2021-11-28 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103463

Hongtao.liu  changed:

   What|Removed |Added

 CC||crazylht at gmail dot com

--- Comment #1 from Hongtao.liu  ---
It should be fixed by
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585613.html

[Bug tree-optimization/103458] [12 Regression] ICE in verify_loop_structure, at cfgloop.c:1736 (error: loop with header 4 not in loop tree)

2021-11-28 Thread rguenth at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103458

Richard Biener  changed:

   What|Removed |Added

   Assignee|unassigned at gcc dot gnu.org  |rguenth at gcc dot 
gnu.org
 Status|NEW |ASSIGNED

--- Comment #2 from Richard Biener  ---
I will have a look.

[PATCH]middle-end cse: Make sure duplicate elements are not entered into the equivalence set [PR103404]

2021-11-28 Thread Tamar Christina via Gcc-patches
Hi All,

CSE uses equivalence classes to keep track of expressions that all have the same
values at the current point in the program.

Normal equivalences through SETs only insert and perform lookups in this set but
equivalence determined from comparisons, e.g.

(insn 46 44 47 7 (set (reg:CCZ 17 flags)
(compare:CCZ (reg:SI 105 [ iD.2893 ])
(const_int 0 [0]))) "cse.c":18:22 7 {*cmpsi_ccno_1}
 (expr_list:REG_DEAD (reg:SI 105 [ iD.2893 ])
(nil)))

creates the equivalence EQ on (reg:SI 105 [ iD.2893 ]) and (const_int 0 [0]).

This causes a merge to happen between the two equivalence sets denoted by
(const_int 0 [0]) and (reg:SI 105 [ iD.2893 ]) respectively.

The operation happens through merge_equiv_classes however this function has an
invariant that the classes to be merge not contain any duplicates.  This is
because it frees entries before merging.

The given testcase when using the supplied flags trigger an ICE due to the
equivalence set being

(rr) p dump_class (class1)
Equivalence chain for (reg:SI 105 [ iD.2893 ]):
(reg:SI 105 [ iD.2893 ])
$3 = void

(rr) p dump_class (class2)
Equivalence chain for (const_int 0 [0]):
(const_int 0 [0])
(reg:SI 97 [ _10 ])
(reg:SI 97 [ _10 ])
$4 = void

This happens because the original INSN being recorded is

(insn 18 17 24 2 (set (subreg:V1SI (reg:SI 97 [ _10 ]) 0)
(const_vector:V1SI [
(const_int 0 [0])
])) "cse.c":11:9 1363 {*movv1si_internal}
 (expr_list:REG_UNUSED (reg:SI 97 [ _10 ])
(nil)))

and we end up generating two equivalences. the first one is simply that
reg:SI 97 is 0.  The second one is that 0 can be extracted from the V1SI, so
subreg (subreg:V1SI (reg:SI 97) 0) 0 == 0.  This nested subreg gets folded away
to just reg:SI 97 and we re-insert the same equivalence.

This patch changes it so that once we figure out the bucket to insert into we
check if the equivalence set already contains the entry and if so just return
the existing entry and exit.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.


Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR rtl-optimization/103404
* cse.c (insert_with_costs): Check if item exists already before adding
a new entry in the equivalence class.

gcc/testsuite/ChangeLog:

PR rtl-optimization/103404
* gcc.target/i386/pr103404.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/cse.c b/gcc/cse.c
index 
c1c7d0ca27b73c4b944b4719f95fece74e0358d5..08295246c594109e947276051c6776e4cabca4ec
 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -1537,6 +1537,17 @@ insert_with_costs (rtx x, struct table_elt *classp, 
unsigned int hash,
   if (REG_P (x) && REGNO (x) < FIRST_PSEUDO_REGISTER)
 add_to_hard_reg_set (_regs_in_table, GET_MODE (x), REGNO (x));
 
+  /* We cannot allow a duplicate to be entered into the equivalence sets
+ and so we should perform a check before we do any allocations or
+ change the buckets.  */
+  if (classp)
+{
+  struct table_elt *p;
+  for (p = classp; p; p = p->next_same_value)
+   if (exp_equiv_p (p->exp, x, 1, false))
+ return p;
+}
+
   /* Put an element for X into the right hash bucket.  */
 
   elt = free_element_chain;
diff --git a/gcc/testsuite/gcc.target/i386/pr103404.c 
b/gcc/testsuite/gcc.target/i386/pr103404.c
new file mode 100644
index 
..66f33645301db09503fc0977fd0f061a19e56ea5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103404.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Og -fcse-follow-jumps -fno-dce 
-fno-early-inlining -fgcse -fharden-conditional-branches -frerun-cse-after-loop 
-fno-tree-ccp -mavx5124fmaps -std=c99 -w" } */
+
+typedef unsigned __attribute__((__vector_size__ (4))) U;
+typedef unsigned __attribute__((__vector_size__ (16))) V;
+typedef unsigned __attribute__((__vector_size__ (64))) W;
+
+int x, y;
+
+V v;
+W w;
+
+inline
+int bar (U a)
+{
+  a |= x;
+  W k =
+__builtin_shufflevector (v, 5 / a,
+2, 4, 0, 2, 4, 1, 0, 1,
+1, 2, 1, 3, 0, 4, 4, 0);
+  w = k;
+  y = 0;
+}
+
+int
+foo ()
+{
+  bar ((U){0x});
+  for (unsigned i; i < sizeof (foo);)
+;
+}
+


-- 
diff --git a/gcc/cse.c b/gcc/cse.c
index c1c7d0ca27b73c4b944b4719f95fece74e0358d5..08295246c594109e947276051c6776e4cabca4ec 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -1537,6 +1537,17 @@ insert_with_costs (rtx x, struct table_elt *classp, unsigned int hash,
   if (REG_P (x) && REGNO (x) < FIRST_PSEUDO_REGISTER)
 add_to_hard_reg_set (_regs_in_table, GET_MODE (x), REGNO (x));
 
+  /* We cannot allow a duplicate to be entered into the equivalence sets
+ and so we should perform a check before we do any allocations or
+ change the buckets.  */
+  if (classp)
+{
+  struct table_elt *p;
+  for (p = classp; p; p = p->next_same_value)
+	if (exp_equiv_p (p->exp, x, 1, 

[Bug target/103463] [12 Regression] ICE: in ix86_attr_length_immediate_default, at config/i386/i386.c:16686 with -Os -fno-tree-dominator-opts -fno-tree-vrp

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103463

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |12.0

[Bug ipa/103451] [12 Regression] crash at gcc/range-op.cc:1836 since r12-5531-g1b0acc4b800b589a

2021-11-28 Thread rguenth at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103451

Richard Biener  changed:

   What|Removed |Added

 CC||aldyh at gcc dot gnu.org,
   ||amacleod at redhat dot com,
   ||rguenth at gcc dot gnu.org

--- Comment #2 from Richard Biener  ---
So range-op.cc eventually wants to look at 'cfun' which of course is a non-go
in IPA context.

void
operator_div::wi_fold (irange , tree type,
   const wide_int _lb, const wide_int _ub,
   const wide_int _lb, const wide_int _ub) const
{
...
  // If flag_non_call_exceptions, we must not eliminate a division by zero.
  if (cfun->can_throw_non_call_exceptions)
{
  r.set_varying (type);
  return;

I'm not sure wi_fold should care about "eliminating a division", but surely
even for non-call EH the actual range of the result doesn't need to care.

So if sth goes wrong when eliding the above it needs to be fixed upthread
instead?

Otherwise a "quick" workaround for the ICE is to check !cfun || ... and
be conservative.  I see there's no state associated with range_fold_binary_expr
where the IPA context could pass down relevant can_throw_non_call_exceptions.

I also see

bool
fold_using_range::range_of_builtin_call (irange , gcall *call,
 fur_source )
...
  if (cfun->after_inlining)
{
  r.set_zero (type);

which might have similar problems (!cfun || ... looks quite reasonable there)

[Bug target/61713] ICE when building c++ code with atomic functions for thumb1 target

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61713

Andrew Pinski  changed:

   What|Removed |Added

 CC||mans at mansr dot com

--- Comment #9 from Andrew Pinski  ---
*** Bug 56964 has been marked as a duplicate of this bug. ***

[Bug middle-end/56964] ICE with -fno-sync-libcalls when target lacks atomic operations

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56964

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
 Status|UNCONFIRMED |RESOLVED

--- Comment #3 from Andrew Pinski  ---
Fixed in GCC 4.9.x and GCC 5+. by the patch which fixed PR 61713. It is an
exact dup really.

*** This bug has been marked as a duplicate of bug 61713 ***

[Bug target/103463] New: [12 Regression] ICE: in ix86_attr_length_immediate_default, at config/i386/i386.c:16686 with -Os -fno-tree-dominator-opts -fno-tree-vrp

2021-11-28 Thread zsojka at seznam dot cz via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103463

Bug ID: 103463
   Summary: [12 Regression] ICE: in
ix86_attr_length_immediate_default, at
config/i386/i386.c:16686 with -Os
-fno-tree-dominator-opts -fno-tree-vrp
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Keywords: ice-on-valid-code
  Severity: normal
  Priority: P3
 Component: target
  Assignee: unassigned at gcc dot gnu.org
  Reporter: zsojka at seznam dot cz
  Target Milestone: ---
  Host: x86_64-pc-linux-gnu
Target: x86_64-pc-linux-gnu

Created attachment 51892
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=51892=edit
reduced testcase

Compiler output:
$ x86_64-pc-linux-gnu-gcc -Os -fno-tree-dominator-opts -fno-tree-vrp testcase.c
during RTL pass: sched2
testcase.c: In function 'bar0':
testcase.c:10:1: internal compiler error: in
ix86_attr_length_immediate_default, at config/i386/i386.c:16686
   10 | }
  | ^
0x814081 ix86_attr_length_immediate_default(rtx_insn*, bool)
/repo/gcc-trunk/gcc/config/i386/i386.c:16686
0x1aeb948 insn_default_length(rtx_insn*)
/repo/gcc-trunk/gcc/config/i386/i386.md:720
0x16dcd92 ix86_min_insn_size(rtx_insn*)
/repo/gcc-trunk/gcc/config/i386/i386.c:21516
0x1762631 core2i7_first_cycle_multipass_filter_ready_try
/repo/gcc-trunk/gcc/config/i386/x86-tune-sched-core.c:107
0x22e199f max_issue(ready_list*, int, void*, bool, int*)
/repo/gcc-trunk/gcc/haifa-sched.c:5952
0x22e47a7 max_issue(ready_list*, int, void*, bool, int*)
/repo/gcc-trunk/gcc/haifa-sched.c:6189
0x22e47a7 choose_ready
/repo/gcc-trunk/gcc/haifa-sched.c:6189
0x22f2532 schedule_block(basic_block_def**, void*)
/repo/gcc-trunk/gcc/haifa-sched.c:6806
0x12fb8cd schedule_region
/repo/gcc-trunk/gcc/sched-rgn.c:3179
0x12fb8cd schedule_insns()
/repo/gcc-trunk/gcc/sched-rgn.c:3518
0x12fbded schedule_insns()
/repo/gcc-trunk/gcc/sched-rgn.c:3504
0x12fbded rest_of_handle_sched2
/repo/gcc-trunk/gcc/sched-rgn.c:3742
0x12fbded execute
/repo/gcc-trunk/gcc/sched-rgn.c:3878
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.

$ x86_64-pc-linux-gnu-gcc -v
Using built-in specs.
COLLECT_GCC=/repo/gcc-trunk/binary-latest/bin/x86_64-pc-linux-gnu-gcc
COLLECT_LTO_WRAPPER=/repo/gcc-trunk/binary-trunk-r12-5569-20211128195959-g300dbea1269-checking-yes-rtl-df-extra-nobootstrap-amd64/bin/../libexec/gcc/x86_64-pc-linux-gnu/12.0.0/lto-wrapper
Target: x86_64-pc-linux-gnu
Configured with: /repo/gcc-trunk//configure --enable-languages=c,c++
--enable-valgrind-annotations --disable-nls --enable-checking=yes,rtl,df,extra
--disable-bootstrap --with-cloog --with-ppl --with-isl
--build=x86_64-pc-linux-gnu --host=x86_64-pc-linux-gnu
--target=x86_64-pc-linux-gnu --with-ld=/usr/bin/x86_64-pc-linux-gnu-ld
--with-as=/usr/bin/x86_64-pc-linux-gnu-as --disable-libstdcxx-pch
--prefix=/repo/gcc-trunk//binary-trunk-r12-5569-20211128195959-g300dbea1269-checking-yes-rtl-df-extra-nobootstrap-amd64
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 12.0.0 20211129 (experimental) (GCC)

[Bug c++/46476] Missing Warning about unreachable code after return [-Wunreachable-code-return]

2021-11-28 Thread rguenth at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46476
Bug 46476 depends on bug 103439, which changed state.

Bug 103439 Summary: genemit emits dead code
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103439

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |WONTFIX

[Bug middle-end/103439] genemit emits dead code

2021-11-28 Thread rguenth at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103439

Richard Biener  changed:

   What|Removed |Added

 Resolution|--- |WONTFIX
 Status|UNCONFIRMED |RESOLVED

--- Comment #6 from Richard Biener  ---
OK, agreed.

[Bug rtl-optimization/60412] superfluous arithmetic generated for uneven tail handling

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60412

Andrew Pinski  changed:

   What|Removed |Added

 CC||l_belev at yahoo dot com

--- Comment #2 from Andrew Pinski  ---
*** Bug 70274 has been marked as a duplicate of this bug. ***

[Bug tree-optimization/70274] optimization goes astray and adds completely redundant code

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70274

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
 Status|NEW |RESOLVED

--- Comment #2 from Andrew Pinski  ---
Dup of bug 60412.

*** This bug has been marked as a duplicate of bug 60412 ***

[Bug rtl-optimization/60412] superfluous arithmetic generated for uneven tail handling

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60412

Andrew Pinski  changed:

   What|Removed |Added

 Ever confirmed|0   |1
   Severity|normal  |enhancement
 Status|UNCONFIRMED |NEW
   Last reconfirmed||2021-11-29
   Keywords||missed-optimization

--- Comment #1 from Andrew Pinski  ---
Confirmed. this is sccp happening but it is not needed.

[Bug rtl-optimization/98782] [11/12 Regression] Bad interaction between IPA frequences and IRA resulting in spills due to changes in BB frequencies

2021-11-28 Thread tnfchris at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98782

--- Comment #8 from Tamar Christina  ---
> 
> I wonder how the situation looks on AArch64?

The situation didn't improve, up until the end of stage-1 we were seeing a 6%
perf uplift from somewhere which seems to have gone away now (in a commit range
with a non IPA related patch).

The major problems is still the spills. Talking to Vlad I took at look at
improving the Chaitin-Briggs heuristics for spilling during the presence of
calls and how it tries to improve the allocation by moving spills along the
call gaph.

By improving on these heuristics I was able to reduce the number of spills and
saw improvements on both x86 and AArch64 which brought us back to the old
numbers.

However this same information is used by other areas such as register
preferences and so I had a regression in shrink wrapping.  There's also an
issue where x86 seems to assign negative values to register costs to indicate
they REALLY want this register.  This seems to work because the penalty applied
usually is large and it cancels out the negative cost.  But now the value stays
negative causing the register to not be used instead.

To fix these I need to keep track of the penalties and the costs separately but
did not get time to finish that work before the end of stage-1.

[Bug middle-end/46143] __attribute__((optimize)) emits wrong code

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46143
Bug 46143 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug target/52555] [4.6/4.7/4.8 Regression] ICE unrecognizable insn with -ffast-math and __attribute__((optimize(xx)))

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52555
Bug 52555 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug target/47315] ICE: in extract_insn, at recog.c:2109 (unrecognizable insn) with -mvzeroupper and __attribute__((target("avx")))

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47315
Bug 47315 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug target/45478] __attribute__((__target__())) causes crashes at various places

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=45478
Bug 45478 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug target/45325] [4.9 Regression] target attribute doesn't work with -march=i586

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=45325
Bug 45325 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug c/41206] Segmentation fault from two "#pragma GCC optimize" lines

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=41206
Bug 41206 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug target/38018] gcc.dg/pr37106-1.c doesn't work

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=38018
Bug 38018 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug target/37394] [4.4 Regression] Segfault in ia64_variable_issue with -O -fschedule-insns2

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37394
Bug 37394 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug middle-end/53776] pragma optimize does not support Os

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53776
Bug 53776 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug middle-end/39840] Non-optimal (or wrong) implementation of SSE intrinsics

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39840
Bug 39840 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug target/39787] ICE with #pragma GCC target

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=39787
Bug 39787 depends on bug 37565, which changed state.

Bug 37565 Summary: __optimize__  attribute doesn't work correctly
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

[Bug middle-end/37565] __optimize__ attribute doesn't work correctly

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37565

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #11 from Andrew Pinski  ---
Fixed in GCC 4.9.0 by r0-125571-gc7f36d55a63c3

https://gcc.gnu.org/pipermail/gcc-patches/2013-October/371339.html

[Bug target/102239] powerpc suboptimal boolean test of contiguous bits

2021-11-28 Thread luoxhu at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102239

--- Comment #7 from luoxhu at gcc dot gnu.org ---
 1| Dump of assembler code for function foo:
 2|0x15e0 <+0>: rldicr. r3,r3,29,1
 3+>   0x15e4 <+4>: beq 0x15f0 
 4|0x15e8 <+8>: blr
 5|0x15ec <+12>:ori r2,r2,0
 6|0x15f0 <+16>:blr
 7|0x15f4 <+20>:.long 0x0
 8|0x15f8 <+24>:.long 0x0

(gdb) si
0x15e4 in foo ()
1: /x $r3 = 0xc000
2: /x $cr = 0x82000282

cr0 is negative if only rotldi3_mask_dot, but it was 0x42000282 on master code.


BTW, clang also generated instructions with two rorates:

foo(long):# @foo(long)
rldicl 3, 3, 31, 33
rldicl. 3, 3, 33, 29
beq 0, .LBB0_2
blr
.LBB0_2:
blr
.long   0
.quad   0

[Bug tree-optimization/49946] Thread jumps confuse loop unrolling

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=49946

Andrew Pinski  changed:

   What|Removed |Added

   Keywords||needs-bisection
   Last reconfirmed||2021-11-29
  Component|middle-end  |tree-optimization
 Ever confirmed|0   |1
   Severity|normal  |enhancement
 Status|UNCONFIRMED |NEW

--- Comment #3 from Andrew Pinski  ---
Confirmed, though it looks like it was fixed on the trunk.
There has been a "few" jump threading patches which would have improved this
situation.

[Bug middle-end/87210] [RFE] introduce build time options to zero initialize automatic stack variables

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87210

Andrew Pinski  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
   Target Milestone|--- |12.0
 Resolution|--- |FIXED

--- Comment #9 from Andrew Pinski  ---
  -ftrivial-auto-var-init=[uninitialized|pattern|zero] Add initializations to
automatic variables.

[Bug middle-end/96159] atomic creates incorrect code for possible misaligned struct

2021-11-28 Thread muecker at gwdg dot de via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96159

Martin Uecker  changed:

   What|Removed |Added

Summary|atomic creates incorrect|atomic creates incorrect
   |code for possible isaligned |code for possible
   |struct  |misaligned struct

--- Comment #10 from Martin Uecker  ---
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87237

[Bug tree-optimization/19676] Loop optimizer fails to reverse simple loop

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19676

Andrew Pinski  changed:

   What|Removed |Added

   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=31238,
   ||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=40886
 Resolution|--- |FIXED
   Target Milestone|--- |4.6.0
 Status|NEW |RESOLVED

--- Comment #6 from Andrew Pinski  ---
Fixed a long time ago in GCC 4.6.0.

Everyone except for testloop2 was fixed in GCC 4.5.0 (which was PR 40886 and PR
31238 ). I have not looked into what fixed it in GCC 4.6 though.

[Bug tree-optimization/103462] GCC failed to reduce bit clear in loop.

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103462

Andrew Pinski  changed:

   What|Removed |Added

   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=101991

--- Comment #3 from Andrew Pinski  ---
Related to PR 101991 but not really the same. In this case the value that
matters is an induction variable while in the other case it was an invariant.

[Bug tree-optimization/103462] GCC failed to reduce bit clear in loop.

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103462

Andrew Pinski  changed:

   What|Removed |Added

   Keywords||missed-optimization
   Severity|normal  |enhancement

[Bug tree-optimization/103462] GCC failed to reduce bit clear in loop.

2021-11-28 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103462

--- Comment #2 from Hongtao.liu  ---
bit clear and induction variable could be simplified to `& CONSTANT`

[Bug tree-optimization/103462] GCC failed to reduce bit clear in loop.

2021-11-28 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103462

--- Comment #1 from Hongtao.liu  ---
Should it be done in vectorizer or ldist(just like memory op), or somewhere
else?

[Bug target/47769] [missed optimization] use of btr (bit test and reset)

2021-11-28 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47769

--- Comment #7 from Hongtao.liu  ---

> 
> This is obviously horrible, but the right answer isn't btr in a loop, it's
> what clang does:
> 
> movabsq $7905747460161236406, %rax # imm = 0x6DB6DB6DB6DB6DB6 every
> third bit unset
> andq%rdi, %rax
> retq
> 

Open pr103462 for this.

[Bug tree-optimization/103462] New: vectorizer failed to reduce bit_clear in loop.

2021-11-28 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103462

Bug ID: 103462
   Summary: vectorizer failed to reduce bit_clear in loop.
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: crazylht at gmail dot com
  Target Milestone: ---
  Host: x86_64-pc-linux-gnu

the testcase is from pr47769

unsigned long cfunc_one(unsigned long tmp) {
for (unsigned long bit = 0; bit < 64; bit += 3) {
tmp &= ~(1UL << bit);
}
return tmp;
}

with -O3 -march=skylake -funroll-loops
gcc generates:
cfunc_one:
mov rax, rdi
xor edx, edx
.L2:
lea rcx, [rdx+3]
btr rax, rdx
lea rsi, [rdx+6]
btr rax, rcx
lea rdi, [rdx+9]
btr rax, rsi
btr rax, rdi
lea r8, [rdx+12]
lea r9, [rdx+15]
btr rax, r8
lea r10, [rdx+18]
btr rax, r9
lea r11, [rdx+21]
btr rax, r10
lea rcx, [rdx+24]
btr rax, r11
lea rsi, [rdx+27]
btr rax, rcx
lea rdi, [rdx+30]
btr rax, rsi
add rdx, 33
btr rax, rdi
cmp rdx, 66
jne .L2
ret

while clang generates:

cfunc_one(unsigned long):  # @cfunc_one(unsigned long)
movabs  rax, 7905747460161236406
and rax, rdi
ret

7905747460161236406 is bit clear for bit {0, 3, 6, 9, ..., 63}.

[Bug ipa/103461] [12 Regression] ICE in operator_div::wi_fold or in evaluate_conditions_for_known_args

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103461

--- Comment #2 from Andrew Pinski  ---
The first one is a dup of bug 103451.

[Bug ipa/103461] [12 Regression] ICE in operator_div::wi_fold or in evaluate_conditions_for_known_args

2021-11-28 Thread asolokha at gmx dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103461

Arseny Solokha  changed:

   What|Removed |Added

Summary|[12 Regression] ICE in  |[12 Regression] ICE in
   |operator_div::wi_fold   |operator_div::wi_fold or in
   ||evaluate_conditions_for_kno
   ||wn_args

--- Comment #1 from Arseny Solokha  ---
Another manifestation of the same issue:

unsigned char n;

void
foo (int a);

void
bar (void)
{
  foo (n + 1);
}

void
foo (int a)
{
  unsigned int x = 10;

  if (x * !a != 0)
bar ();
}

% gcc-12.0.0 -O2 --param early-inlining-insns=0 -c ljkqy0ae.c
during IPA pass: inline
ljkqy0ae.c:19:1: internal compiler error: in
evaluate_conditions_for_known_args, at ipa-fnsummary.c:516
   19 | }
  | ^
0x6d8bdb evaluate_conditions_for_known_args
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-fnsummary.c:516
0xc4732d do_estimate_edge_size(cgraph_edge*)
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline-analysis.c:330
0xc48aa7 estimate_edge_size
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.h:79
0xc48aa7 estimate_edge_growth
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.h:100
0xc48aa7 do_estimate_growth_1
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline-analysis.c:429
0xc48b2e cgraph_node::call_for_symbol_and_aliases(bool (*)(cgraph_node*,
void*), void*, bool)
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/cgraph.h:3411
0xc48b2e estimate_growth(cgraph_node*)
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline-analysis.c:467
0x1ca09ef inline_small_functions
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.c:1980
0x1ca09ef ipa_inline
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.c:2743
0x1ca09ef execute
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.c:3142

[Bug ipa/103461] [12 Regression] ICE in operator_div::wi_fold

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103461

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |12.0

[Bug ipa/103461] New: [12 Regression] ICE in operator_div::wi_fold

2021-11-28 Thread asolokha at gmx dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103461

Bug ID: 103461
   Summary: [12 Regression] ICE in operator_div::wi_fold
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Keywords: ice-on-valid-code
  Severity: normal
  Priority: P3
 Component: ipa
  Assignee: unassigned at gcc dot gnu.org
  Reporter: asolokha at gmx dot com
CC: marxin at gcc dot gnu.org
  Target Milestone: ---

gcc-12.0.0-alpha20211128 snapshot (g:2899d49e3701a4df18a336a680a7095cc99a2229)
ICEs when compiling the following testcase w/ -O2:

int
baz (int c);

int
bar (int a, int b)
{
  return baz (a && (b / 0));
}

int
foo (int a, short int b)
{
  return bar (a, b);
}

% gcc-12.0.0 -O2 -w -c drnnxgu8.c
during IPA pass: inline
drnnxgu8.c:14:1: internal compiler error: Segmentation fault
   14 | }
  | ^
0xeaadef crash_signal
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/toplev.c:322
0x1d1e040 operator_div::wi_fold(irange&, tree_node*,
generic_wide_int const&, generic_wide_int
const&, generic_wide_int const&,
generic_wide_int const&) const
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/range-op.cc:1836
0x1d11cd7 range_operator::wi_fold_in_parts(irange&, tree_node*,
generic_wide_int const&, generic_wide_int
const&, generic_wide_int const&,
generic_wide_int const&) const
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/range-op.cc:192
0x1d12634 range_operator::fold_range(irange&, tree_node*, irange const&, irange
const&, tree_code) const
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/range-op.cc:214
0x116b949 range_fold_binary_expr(int_range<1u>*, tree_code, tree_node*,
int_range<1u> const*, int_range<1u> const*)
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/tree-vrp.c:1080
0xc2aad5 evaluate_conditions_for_known_args
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-fnsummary.c:511
0xc4732d do_estimate_edge_size(cgraph_edge*)
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline-analysis.c:330
0xc48aa7 estimate_edge_size
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.h:79
0xc48aa7 estimate_edge_growth
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.h:100
0xc48aa7 do_estimate_growth_1
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline-analysis.c:429
0xc48b2e cgraph_node::call_for_symbol_and_aliases(bool (*)(cgraph_node*,
void*), void*, bool)
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/cgraph.h:3411
0xc48b2e estimate_growth(cgraph_node*)
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline-analysis.c:467
0x1ca09ef inline_small_functions
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.c:1980
0x1ca09ef ipa_inline
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.c:2743
0x1ca09ef execute
   
/var/tmp/portage/sys-devel/gcc-12.0.0_p20211128/work/gcc-12-20211128/gcc/ipa-inline.c:3142

[Bug middle-end/60070] An option to disable all floating-pont

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60070

--- Comment #3 from Andrew Pinski  ---
For x86_64, the option was added with PR 70738.

I don't know if this would be an useful option that is general though. Each
target will implement it differently too.

[Bug target/61810] init-regs.c papers over issues elsewhere

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61810

--- Comment #6 from Andrew Pinski  ---
https://gcc.gnu.org/pipermail/gcc-patches/2021-August/577192.html

[Bug tree-optimization/64992] More optimize opportunity

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64992

Andrew Pinski  changed:

   What|Removed |Added

   Assignee|unassigned at gcc dot gnu.org  |pinskia at gcc dot 
gnu.org
 Status|NEW |ASSIGNED

--- Comment #8 from Andrew Pinski  ---
The missed optimization is:
int f(int c)
{
  unsigned t = c == 1;
  t <<= 1;
  return t == 0;
}

int f1(int c)
{
  unsigned t = c == 1;
  return ((int)t) <= 0;
}

That is (t << 1) == 0 should be convert into either:
(t & 0x7fff) or ((int)t) <= 0

Though the (semi more) general case is:
(for shift (lshift rshift)
 (for eqne (eq ne)
  (simplify
   (eqne (shift @0 INTEGER_CST@1) integer_zerop@2)
   (with
{
  mask = ...
}
(eqne (bit_and @0 { mask; }) @2)

And then the mask bit_and neeq to gtle

And then the rest will just work

[Bug target/102239] powerpc suboptimal boolean test of contiguous bits

2021-11-28 Thread luoxhu at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102239

--- Comment #6 from luoxhu at gcc dot gnu.org ---
(In reply to Segher Boessenkool from comment #5)
> (In reply to luoxhu from comment #4)
> > Simply adjust the sequence of dot instruction could produce expected code,
> > is this correct?
> 
> No it isn't.  Sorry.

Sorry I don't understand what is wrong...

> 
> > foo:
> > .LFB0:
> > .cfi_startproc
> > rldicr. 3,3,29,1
> > beq 0,.L2
> 
> This is fine, but only because it tests the EQ bit (not the LT or GT bits).
> So the generated RTL for this insn (the 2insn one) is not correct.

The generated RTL in pr102239.c.300r.split2 is:

(insn 32 8 33 2 (parallel [
(set (reg:CC 100 0 [123])
(compare:CC (and:DI (ashift:DI (reg:DI 3 3 [124])
(const_int 29 [0x1d]))
(const_int -4611686018427387904 [0xc000]))
(const_int 0 [0])))
(clobber (reg:DI 3 3 [125]))
]) "pr102239.c":4:6 238 {*rotldi3_mask_dot}
 (nil))
(insn 33 32 10 2 (set (reg:DI 3 3 [125])
(lshiftrt:DI (reg:DI 3 3 [125])
(const_int 29 [0x1d]))) "pr102239.c":4:6 278 {lshrdi3}
 (nil))
(jump_insn 10 33 11 2 (set (pc)
(if_then_else (eq (reg:CC 100 0 [123])
(const_int 0 [0]))
(label_ref 15)
(pc))) "pr102239.c":4:6 868 {*cbranch}
 (int_list:REG_BR_PROB 536870916 (nil))
 -> 15)


rotldi3_mask_dot is what you mentioned in c#1, it is a shifted result and not
matter for comparing to 0:

> *rotl3_mask_dot cannot do this either; the base and the dot2 of that
> cannot be done, they return a shifted result, but that doesn't matter for
> comparing it to 0.  So we should add a specialised version.

What specialized version to add?

[Bug target/102811] vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c

2021-11-28 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102811

--- Comment #20 from Hongtao.liu  ---
(In reply to Uroš Bizjak from comment #18)
> (In reply to Uroš Bizjak from comment #17)
> > (In reply to Hongtao.liu from comment #16)
> > 
> > > ix86_expand_vector_set is mainly used by vec_set_optab which exactly takes
> > > target as both input and output, it seems we can't create a new target for
> > > that.
> > 
> > OK, let's try to optimize it with gen_pinsr, as you proposed.
> > 
> > (It looks that the add-on patch from Comment #6 will generate VPBLEND in
> > this case, too.)
> 
> We should manually generate vinsertps from truncsfhf2, too. There is no
> point to call ix86_expand_vector_set if we already know the instruction. It
> will use vec_set_0 insn pattern, which has quite some
> alternatives.

For AVX2, your attached patch will optimize

vpxor   %xmm2, %xmm2, %xmm2
-   vpbroadcastw%xmm1, %xmm1
-   vpbroadcastw%xmm0, %xmm0
vpblendw$1, %xmm0, %xmm2, %xmm0
vpblendw$1, %xmm1, %xmm2, %xmm2
vcvtph2ps   %xmm2, %xmm2

Since upper bits of xmm1/xmm0 is not selected by vpblendw.

[Bug rtl-optimization/50677] volatile forces load into register

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=50677

Andrew Pinski  changed:

   What|Removed |Added

   Severity|normal  |enhancement
   Keywords||missed-optimization

[Bug middle-end/60089] Complex arithmetic instructions

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60089

Andrew Pinski  changed:

   What|Removed |Added

   Keywords||missed-optimization

--- Comment #5 from Andrew Pinski  ---
The way we model these instructions these days is using vector modes.

[Bug middle-end/53875] calls to const functions are eliminated at -O0

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53875

Andrew Pinski  changed:

   What|Removed |Added

 CC||roger at nextmovesoftware dot 
com

--- Comment #5 from Andrew Pinski  ---
diff --git a/gcc/calls.c b/gcc/calls.c
index 27b59f26ad3..f23dde58671 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -2735,7 +2735,8 @@ expand_call (tree exp, rtx target, int ignore)
   && (!(flags & ECF_LOOPING_CONST_OR_PURE))
   && (flags & ECF_NOTHROW)
   && (ignore || target == const0_rtx
- || TYPE_MODE (rettype) == VOIDmode))
+ || TYPE_MODE (rettype) == VOIDmode)
+  && !optimize)
 {
   bool volatilep = false;
   tree arg;

[Bug middle-end/53875] calls to const functions are eliminated at -O0

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53875

--- Comment #4 from Andrew Pinski  ---
(In reply to Andrew Pinski from comment #3)
> The code which introduced this was
> g:8c6a82695b85f8ed74cdc67f2cf74c5a62d0d91d .

https://gcc.gnu.org/pipermail/gcc-patches/2003-May/104797.html

[Bug middle-end/53875] calls to const functions are eliminated at -O0

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53875

Andrew Pinski  changed:

   What|Removed |Added

  Known to work||3.3.3
  Known to fail||3.4.0

--- Comment #3 from Andrew Pinski  ---
The code which introduced this was g:8c6a82695b85f8ed74cdc67f2cf74c5a62d0d91d .

[Bug target/102811] vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c

2021-11-28 Thread crazylht at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102811

--- Comment #19 from Hongtao.liu  ---
(In reply to Uroš Bizjak from comment #17)
> (In reply to Hongtao.liu from comment #16)
> 
> > ix86_expand_vector_set is mainly used by vec_set_optab which exactly takes
> > target as both input and output, it seems we can't create a new target for
> > that.
> 
> OK, let's try to optimize it with gen_pinsr, as you proposed.
> 
> (It looks that the add-on patch from Comment #6 will generate VPBLEND in
> this case, too.)

I think your attached patch is a seperate optimization, the new added
alternatives which generates VPBLEND extend the pattern to accept sse register
for the inserted value, currently we only have "rm".

[Bug middle-end/59711] ICE in force_constant_size, at gimplify.c:619 with variably-modified return type

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59711

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |6.0
   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=70093
 Resolution|--- |FIXED
   Keywords||ice-on-valid-code
 Status|NEW |RESOLVED

--- Comment #9 from Andrew Pinski  ---
Fixed in GCC 6, by the patch which fixed PR 70093.

[Bug c++/103460] New: GCC rejected operator[](auto[]...) after P2128

2021-11-28 Thread hewillk at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103460

Bug ID: 103460
   Summary: GCC rejected operator[](auto[]...) after P2128
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c++
  Assignee: unassigned at gcc dot gnu.org
  Reporter: hewillk at gmail dot com
  Target Milestone: ---

This should be well-formed after P2128, please correct me if I am wrong.

struct S {
  void operator[](auto[]...);
};

https://godbolt.org/z/renf6nePj

[Bug fortran/50463] [4.6/4.7 Regression] -ftree-dse leeds to wrong code with gfortran

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=50463

Andrew Pinski  changed:

   What|Removed |Added

 CC||strasbur at chkw386 dot 
ch.pwr.wro
   ||c.pl

--- Comment #8 from Andrew Pinski  ---
*** Bug 58270 has been marked as a duplicate of this bug. ***

[Bug middle-end/58270] Wrong code while accessing trailing array elements in a global common structure

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58270

Andrew Pinski  changed:

   What|Removed |Added

   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=53086
 Status|NEW |RESOLVED
 Resolution|--- |DUPLICATE

--- Comment #22 from Andrew Pinski  ---
Invalid as mentioned and a dup as mentioned.

*** This bug has been marked as a duplicate of bug 50463 ***

[Bug middle-end/50808] Diagnostic output at expansion time should be moved earlier.

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=50808

Andrew Pinski  changed:

   What|Removed |Added

   Last reconfirmed|2011-10-20 00:00:00 |2021-11-28

--- Comment #4 from Andrew Pinski  ---
>* gcc.dg/noncompile/invalid_asm.c: Likewise.
>* gcc.dg/noncompile/920507-1.c: Likewise.

These still fail with non-fat LTO.
The rest I think should not pass with non-fat LTO.

[Bug ipa/46554] Less inlining leads to CSiBE regression

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46554

Andrew Pinski  changed:

   What|Removed |Added

  Component|middle-end  |ipa
   Keywords||missed-optimization
 CC||marxin at gcc dot gnu.org

--- Comment #3 from Andrew Pinski  ---
Many things has changed since GCC 4.6 with respect to the inliner, has this
been fixed?

[Bug c++/97681] noinline attribute ignored on constexpr function

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97681

Andrew Pinski  changed:

   What|Removed |Added

   Keywords||wrong-debug
   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=93008

--- Comment #8 from Andrew Pinski  ---
Plus newer versions of C++ are relaxing constexpr even more and GCC added
-fimplicit-constexpr which enables implicit constexpr for inline functions. 

Also constexpr have an implicit inline too, see PR 93008.

Re: [PATCH] rs6000/test: Add emulated gather test case

2021-11-28 Thread Kewen.Lin via Gcc-patches
on 2021/11/27 上午12:24, Segher Boessenkool wrote:
> Hi!
> 
> On Thu, Nov 25, 2021 at 11:20:57AM +0800, Kewen.Lin wrote:
>> This patch is to add a test case similar to the one in i386
>> to add testing coverage for 510.parest_r hotspots.
> 
>> gcc/testsuite/ChangeLog:
>>  * gcc.target/powerpc/vect-gather-1.c: New test.
> 
> This is okay for trunk.  Thanks!
> 

Thanks Segher!  Committed as r12-5569.

BR,
Kewen


[Bug c++/93008] Need a way to make inlining heuristics ignore whether a function is inline

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93008

Andrew Pinski  changed:

   What|Removed |Added

   Severity|normal  |enhancement

[Bug c++/65503] g++ string array in struct crash

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65503

Andrew Pinski  changed:

   What|Removed |Added

   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=92385

--- Comment #4 from Andrew Pinski  ---
I suspect PR 92385 is a similar issue as here.

[Bug other/103021] Make the path to etags used in the build system configurable

2021-11-28 Thread egallager at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103021

Eric Gallager  changed:

   What|Removed |Added

   Keywords||patch
URL||https://gcc.gnu.org/piperma
   ||il/gcc-patches/2021-Novembe
   ||r/585614.html

--- Comment #2 from Eric Gallager  ---
Patch posted:
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585614.html

[PATCH] Make the path to etags used in the build system configurable [PR103021]

2021-11-28 Thread Eric Gallager via Gcc-patches
The attached patch allows users to specify a path to their `etags`
executable for use when doing `make tags`, which is meant to close PR
other/103021: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103021
I based this patch off of this one from upstream automake:
https://git.savannah.gnu.org/cgit/automake.git/commit/m4?id=d2ccbd7eb38d6a4277d6f42b994eb5a29b1edf29
This means that I just supplied variables that the user can override
for the tags programs, rather than having the configure scripts
actually check for them. I handle etags and ctags separately because
the intl subdirectory has separate targets for them. Tested with `make
tags`; the changes I made work successfully, but some of the
subdirectories still have broken tags targets, so I had to switch to
`make -k tags` part way through. This isn't because of anything I did,
though; the `-k` flag is only necessary because of errors that were
already there before I touched anything. Also note that this patch
only affects the subdirectories that use handwritten Makefiles; the
ones that use automake will have to wait until we update the version
of automake used to be 1.16.4 or newer before they'll be fixed.


patch-configurable-etags.diff
Description: Binary data


[Bug rtl-optimization/98782] [11/12 Regression] Bad interaction between IPA frequences and IRA resulting in spills due to changes in BB frequencies

2021-11-28 Thread jiangning.liu at amperecomputing dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98782

--- Comment #7 from Jiangning Liu  ---
Without reverting the commit g:1118a3ff9d3ad6a64bba25dc01e7703325e23d92, we
still see exchange2 performance issue for aarch64. BTW, we have been using
-fno-inline-functions-called-once to get the best performance number for
exchange2.

[PATCH] Fix regression introduced by r12-5536.

2021-11-28 Thread liuhongt via Gcc-patches
There're several failures reported in [1]:
1.  unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
%vpextrw should be used in output templates.
2. ICE in get_attr_memory for movhi_internal since some alternatives
are marked as TYPE_SSELOG.
Explicitly set memory_attr for those alternatives.

Also this patch fixs a typo and some latent bugs which are related to
moving HImode from/to sse register w/o TARGET_AVX512FP16.

For optimization issues discussed in PR102811, I'll create another patch for
it.
[1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html


Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake}
Ok for trunk?

gcc/ChangeLog:

* config/i386/i386.c (ix86_secondary_reload): Without
TARGET_SSE4_1, General register is needed to move HImode from
sse register to memory.
* config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of
pextrw in output templates.
* config/i386/i386.md (movhi_internal): Ditto, also fix typo of
MEM_P (operands[1]) and adjust memory/mode/prefix/type
attribute for alternatives related to sse register.
---
 gcc/config/i386/i386.c  |  2 +-
 gcc/config/i386/i386.md | 44 ++---
 gcc/config/i386/sse.md  |  6 +++---
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3dedf522c42..7cf599f57f7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t 
rclass,
 }
 
   /* Require movement to gpr, and then store to memory.  */
-  if (mode == HFmode
+  if ((mode == HFmode || mode == HImode)
   && !TARGET_SSE4_1
   && SSE_CLASS_P (rclass)
   && !in_p && MEM_P (x))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 68606e57e60..2cb3e727588 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal"
 case TYPE_SSELOG:
   if (SSE_REG_P (operands[0]))
return MEM_P (operands[1])
- ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
- : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+ ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
+ : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
   else
-   return MEM_P (operands[1])
- ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
- : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+   return MEM_P (operands[0])
+ ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
+ : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
 
 case TYPE_MSKLOG:
   if (operands[1] == const0_rtx)
@@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal"
   ]
   (const_string "*")))
(set (attr "type")
- (cond [(eq_attr "alternative" "9,10,11,12,13")
+ (cond [(eq_attr "alternative" "9,10,12,13")
  (if_then_else (match_test "TARGET_AVX512FP16")
(const_string "ssemov")
(const_string "sselog"))
(eq_attr "alternative" "4,5,6,7")
  (const_string "mskmov")
+   (eq_attr "alternative" "11")
+ (const_string "ssemov")
(eq_attr "alternative" "8")
  (const_string "msklog")
(match_test "optimize_function_for_size_p (cfun)")
@@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal"
  (const_string "imovx")
   ]
   (const_string "imov")))
+(set (attr "memory")
+(cond [(eq_attr "alternative" "9,10")
+ (const_string "none")
+   (eq_attr "alternative" "12")
+ (const_string "load")
+   (eq_attr "alternative" "13")
+ (const_string "store")
+   ]
+   (const_string "*")))
 (set (attr "prefix")
-  (if_then_else (eq_attr "alternative" "4,5,6,7,8")
-   (const_string "vex")
-   (const_string "orig")))
+(cond [(eq_attr "alternative" "9,10,11,12,13")
+ (const_string "maybe_evex")
+   (eq_attr "alternative" "4,5,6,7,8")
+ (const_string "vex")
+  ]
+  (const_string "orig")))
 (set (attr "mode")
   (cond [(eq_attr "type" "imovx")
   (const_string "SI")
+(eq_attr "alternative" "9,10,12,13")
+  (if_then_else (match_test "TARGET_AVX512FP16")
+(const_string "HI")
+(const_string "TI"))
 (eq_attr "alternative" "11")
-  (const_string "HF")
+  (if_then_else (match_test "TARGET_AVX512FP16")
+(const_string "HF")
+(const_string "SF"))
 (and (eq_attr "alternative" "1,2")
  (match_operand:HI 1 "aligned_operand"))
   (const_string "SI")
@@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal"
   ? "pinsrw\t{$0, %1, 

Re: [PATCH] tree-optimization: [PR101540] Simplify CONSTRUCTOR for vector(1) to be VCE

2021-11-28 Thread Andrew Pinski via Gcc-patches
On Sun, Nov 28, 2021 at 12:25 PM Jeff Law via Gcc-patches
 wrote:
>
>
>
> On 11/28/2021 10:56 AM, apinski--- via Gcc-patches wrote:
> > From: Andrew Pinski 
> >
> > This just adds a simplification to simplify_vector_constructor for
> > vector of 1 element to be VCE which should reduce memory usage in
> > the compiler and maybe allow for some more optimizations.
> >
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> >   PR tree-optimization/101540
> >
> > gcc/ChangeLog:
> >
> >   * tree-ssa-forwprop.c (simplify_vector_constructor):
> >   Simplify constructor of vector of 1 element to just
> >   be a VIEW_CONVERT_EXPR.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.dg/tree-ssa/pr101540-1.c: New test.
> So why generate a VCE here if the type conversion is useless?  Why not
> just a NOP_EXPR?  Is there something special about converting between
> the element type and the outer vector type that requires VCE rather than
> NOP_EXR?  Neither an ACK or NAK, just trying to understand it a bit better.


Because right now tree-cfg.c has this check for vector types for NOP_EXPR:
/* Allow conversions between vectors with the same number of elements,
   provided that the conversion is OK for the element types too.  */
if (VECTOR_TYPE_P (lhs_type)
&& VECTOR_TYPE_P (rhs1_type)
&& known_eq (TYPE_VECTOR_SUBPARTS (lhs_type),
 TYPE_VECTOR_SUBPARTS (rhs1_type)))
  {
lhs_type = TREE_TYPE (lhs_type);
rhs1_type = TREE_TYPE (rhs1_type);
  }
else if (VECTOR_TYPE_P (lhs_type) || VECTOR_TYPE_P (rhs1_type))
  {
error ("invalid vector types in nop conversion");
debug_generic_expr (lhs_type);
debug_generic_expr (rhs1_type);
return true;
  }

We can change this check here for NOP_EXPR and vector types but VCE is
still a nop in most cases and handled as such really. But I wonder if
the rest of the compiler is ready for it though.

Thanks,
Andrew Pinski

>
> Jeff
>
>


[Bug other/19089] Environment variable TMP may yield gcc: abort with internal error

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19089

Andrew Pinski  changed:

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

--- Comment #7 from Andrew Pinski  ---
Fixed. Sorry it took so long in fixing this issue.

[Bug other/19089] Environment variable TMP may yield gcc: abort with internal error

2021-11-28 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19089

--- Comment #6 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:68332ab7ec58a89660db82569c5f4c2251d59741

commit r12-5568-g68332ab7ec58a89660db82569c5f4c2251d59741
Author: Andrew Pinski 
Date:   Sat Nov 27 18:16:50 2021 -0800

Fix PR 19089: Environment variable TMP may yield gcc: abort

Even though I cannot reproduce the ICE any more, this is still
a bug. We check already to see if we can access the directory
but never check to see if the path is actually a directory.

This adds the check and now we reject the file as not usable
as a tmp directory.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

libiberty/ChangeLog:

* make-temp-file.c (try_dir): Check to see if the dir
is actually a directory.

Re: [PATCH] Fix PR 19089: Environment variable TMP may yield gcc: abort

2021-11-28 Thread Andrew Pinski via Gcc-patches
On Sun, Nov 28, 2021 at 12:14 PM Jeff Law via Gcc-patches
 wrote:
>
>
>
> On 11/27/2021 7:49 PM, apinski--- via Gcc-patches wrote:
> > From: Andrew Pinski 
> >
> > Even though I cannot reproduce the ICE any more, this is still
> > a bug. We check already to see if we can access the directory
> > but never check to see if the path is actually a directory.
> >
> > This adds the check and now we reject the file as not usable
> > as a tmp directory.
> >
> > OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
> >
> > libiberty/ChangeLog:
> >
> >   * make-temp-file.c (try_dir): Check to see if the dir
> >   is actually a directory.
> > ---
> >   libiberty/make-temp-file.c | 16 +++-
> >   1 file changed, 15 insertions(+), 1 deletion(-)
> >
> > diff --git a/libiberty/make-temp-file.c b/libiberty/make-temp-file.c
> > index 31f87fbcfde..11eb03d12ec 100644
> > --- a/libiberty/make-temp-file.c
> > +++ b/libiberty/make-temp-file.c
> > @@ -39,6 +39,10 @@ Boston, MA 02110-1301, USA.  */
> >   #if defined(_WIN32) && !defined(__CYGWIN__)
> >   #include 
> >   #endif
> > +#if HAVE_SYS_STAT_H
> > +#include 
> > +#endif
> > +
> >
> >   #ifndef R_OK
> >   #define R_OK 4
> > @@ -76,7 +80,17 @@ try_dir (const char *dir, const char *base)
> >   return base;
> > if (dir != 0
> > && access (dir, R_OK | W_OK | X_OK) == 0)
> > -return dir;
> > +{
> > +  /* Check to make sure dir is actually a directory. */
> > +#ifdef S_ISDIR
> > +  struct stat s;
> > +  if (stat(dir, ))
> Formatting nit, missing whitespace between stat and open paren.
>
> Presumably this doesn't fix the problem in the case where S_ISDIR is not
> defined.  But it's still an improvement.  OK with the nit fixed.

Correct, though I don't know of any host where S_ISDIR is not defined.
Mingw has them defined. So does cygwin. glibc (and all libc on Linux)
has them defined, Solaris and AIX has them defined. So Does Mac OS X.


MSVC does not define them but we don't support MSVC to compile GCC so
that should not be an issue.

Thanks,
Andrew

>
> jeff
>


[Bug debug/24551] [meta-bug] -feliminate-unused-debug-types issues

2021-11-28 Thread egallager at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=24551

Eric Gallager  changed:

   What|Removed |Added

 CC||egallager at gcc dot gnu.org,
   ||patrickdepinguin at gmail dot 
com

--- Comment #6 from Eric Gallager  ---
(In reply to Eric Gallager from comment #5)
> (In reply to Thomas De Schampheleire from comment #4)
> > Could it not be that #14167 is now fixed after fixing #86964 ?
> 
> is bug 86964 actually fixed? It's still open...

Never mind; NOW it's fixed...

Re: [committed 03/12] d: Insert null terminator in obstack buffers

2021-11-28 Thread Iain Buclaw via Gcc-patches
Excerpts from Iain Buclaw's message of November 26, 2021 1:35 pm:
> Excerpts from Martin Liška's message of November 25, 2021 3:09 pm:
>> On 7/30/21 13:01, Iain Buclaw via Gcc-patches wrote:
>>> |Covers cases where functions that handle the extracted strings ignore the 
>>> explicit length. This isn't something that's known to happen in the current 
>>> front-end, but the self-hosted front-end has been observed to do this in 
>>> its conversions between D and C-style strings.|
>> 
>> Can you please cherry pick this for gcc-11 branch as I see nasty output when 
>> using --verbose:
>> 
>> $ gcc /home/marxin/Programming/gcc/gcc/testsuite/gdc.dg/attr_optimize4.d -c 
>> --verbose
>> ...
>> predefs   GNU D_Version2 LittleEndian GNU_DWARF2_Exceptions 
>> GNU_StackGrowsDown GNU_InlineAsm D_LP64 assert D_ModuleInfo D_Exceptions 
>> D_TypeInfo all X86_64 D_HardFloat Posix linux CRuntime_Glibc 
>> CppRuntime_Gcc��...
>> 
>> 
> 
> Ouch, I'll have a look at gcc-9 and 10 too to see if they are the same.
> 

FYI, patch applied cleanly to gcc-11 branch and has been committed.
Saw no regressions on x86_64-linux-gnu in both bootstrap and tests.

Checked other branches, however earlier releases used the dmd
front-end's OutBuffer, so are unaffected.

Iain.


[Bug c++/90885] GCC should warn about 2^16 and 2^32 and 2^64 [-Wxor-used-as-pow]

2021-11-28 Thread egallager at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90885

Eric Gallager  changed:

   What|Removed |Added

Summary|GCC should warn about 2^16  |GCC should warn about 2^16
   |and 2^32 and 2^64   |and 2^32 and 2^64
   ||[-Wxor-used-as-pow]

--- Comment #23 from Eric Gallager  ---
putting -Wxor-used-as-pow in the title since that's what clang went with

[Bug bootstrap/103459] New: Make configury regenerate cleanly with `autoreconf -Wall`

2021-11-28 Thread egallager at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103459

Bug ID: 103459
   Summary: Make configury regenerate cleanly with `autoreconf
-Wall`
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Keywords: build
  Severity: enhancement
  Priority: P3
 Component: bootstrap
  Assignee: unassigned at gcc dot gnu.org
  Reporter: egallager at gcc dot gnu.org
  Target Milestone: ---

autoreconf comes with a -Wall flag (much like gcc's) that warns about
questionable and/or outdated autoconf/automake practices:
https://www.gnu.org/software/autoconf/manual/autoconf-2.69/html_node/autoreconf-Invocation.html
Currently when using this flag, it prints many warnings on gcc's various
configuration files; there are a lot of them, so I'm not going to paste them
here. It would be nice if we could silence these warnings so that we could use
that flag cleanly.

[Bug tree-optimization/103458] [12 Regression] ICE in verify_loop_structure, at cfgloop.c:1736 (error: loop with header 4 not in loop tree)

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103458

Andrew Pinski  changed:

   What|Removed |Added

 Ever confirmed|0   |1
   Keywords||needs-bisection
   Last reconfirmed||2021-11-29
 Status|UNCONFIRMED |NEW

--- Comment #1 from Andrew Pinski  ---
Confirmed. It looks like a latent bug though.

[Bug tree-optimization/103458] [12 Regression] ICE in verify_loop_structure, at cfgloop.c:1736 (error: loop with header 4 not in loop tree)

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103458

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |12.0

[Bug tree-optimization/103456] [12 Regression] gcc/gcc.c:9502:8: runtime error: load of address 0x0000009f5037 with insufficient space for an object of type 'const char' since r12-5548-g4a2007594cff78

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103456

Andrew Pinski  changed:

   What|Removed |Added

   Severity|normal  |blocker

[Bug tree-optimization/81174] bswap not recognized in |= statement

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81174

Andrew Pinski  changed:

   What|Removed |Added

   Keywords||needs-bisection
   See Also||https://gcc.gnu.org/bugzill
   ||a/show_bug.cgi?id=86723

--- Comment #4 from Andrew Pinski  ---
This is fixed on the trunk.  I want to say the  r12-3072-gb320edc0c29c838b00
(PR86723) is what fixed this.

[Bug tree-optimization/103457] boolean operations on bit-fields are not merged

2021-11-28 Thread roland.illig at gmx dot de via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103457

--- Comment #2 from Roland Illig  ---
Cool, thank you for taking this optimization.

Just to give you a bit of background: I discovered this while converting some
of the enum types in BSD Make to proper bitfields, which theoretically should
be possible without affecting the generated code.

https://github.com/NetBSD/src/blob/trunk/usr.bin/make/make.h

It was interesting to play around with this code on https://godbolt.org/,
seeing how differently the available compilers translate this simple code
fragment. That's where the Intel assembler syntax comes from. :)

[Bug c++/90782] internal compiler error: in dependent_type_p, at cp/pt.c:25409

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90782

Andrew Pinski  changed:

   What|Removed |Added

   Keywords||needs-bisection

--- Comment #7 from Andrew Pinski  ---
This looks fixed on the trunk.

[Bug c++/103455] [9/10/11/12 Regression] internal compiler error: in dependent_type_p, at cp/pt.c:27057

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103455

--- Comment #5 from Andrew Pinski  ---
Reduced:
template 
struct rp {
T* operator->() const;
operator T*() const;
template  explicit operator U*() const;
};
struct b {};
typedef void (b::*fptr)();
void foo(rp n, fptr h) {
(n->*h)();
}

[Bug sanitizer/100987] make distclean error "hwasan: No such file or directory"

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100987
Bug 100987 depends on bug 62157, which changed state.

Bug 62157 Summary: make distclean error when libsanitizer is configured not to 
build 'tsan'
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62157

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

[Bug bootstrap/3415] make distclean (in gcc subdirectory) does not clean up all the way

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3415
Bug 3415 depends on bug 62157, which changed state.

Bug 62157 Summary: make distclean error when libsanitizer is configured not to 
build 'tsan'
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62157

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

[Bug sanitizer/62157] make distclean error when libsanitizer is configured not to build 'tsan'

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62157

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |FIXED
   Target Milestone|--- |12.0
 Status|ASSIGNED|RESOLVED

--- Comment #8 from Andrew Pinski  ---
Fixed.

[Bug sanitizer/62157] make distclean error when libsanitizer is configured not to build 'tsan'

2021-11-28 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62157

--- Comment #7 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:32377c101934477e3d27fec9c6a22f1c97ccf730

commit r12-5566-g32377c101934477e3d27fec9c6a22f1c97ccf730
Author: Andrew Pinski 
Date:   Sun Nov 28 01:14:59 2021 +

Fix PR 62157: disclean in libsanitizer not working

So what is happening is DIST_SUBDIRS contains the conditional
directories which is wrong, so we need to force DIST_SUBDIRS
to be the same as SUBDIRS as recommened by the automake manual.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Also now make distclean works inside libsanitizer directory.

libsanitizer/ChangeLog:

PR sanitizer/62157
* Makefile.am: Force DIST_SUBDIRS to be SUBDIRS.
* Makefile.in: Regenerate.
* asan/Makefile.in: Likewise.
* hwasan/Makefile.in: Likewise.
* interception/Makefile.in: Likewise.
* libbacktrace/Makefile.in: Likewise.
* lsan/Makefile.in: Likewise.
* sanitizer_common/Makefile.in: Likewise.
* tsan/Makefile.in: Likewise.
* ubsan/Makefile.in: Likewise.

gcc-12-20211128 is now available

2021-11-28 Thread GCC Administrator via Gcc
Snapshot gcc-12-20211128 is now available on
  https://gcc.gnu.org/pub/gcc/snapshots/12-20211128/
and on various mirrors, see http://gcc.gnu.org/mirrors.html for details.

This snapshot has been generated from the GCC 12 git branch
with the following options: git://gcc.gnu.org/git/gcc.git branch master 
revision 2899d49e3701a4df18a336a680a7095cc99a2229

You'll find:

 gcc-12-20211128.tar.xz   Complete GCC

  SHA256=c201acfa76234f2a6c33075894c88cf0fa1de9cb6709bf14da71d960172b78b2
  SHA1=04e31c1a0ae09183f079f5377291c91644be6d78

Diffs from 12-20211121 are available in the diffs/ subdirectory.

When a particular snapshot is ready for public consumption the LATEST-12
link is updated and a message is sent to the gcc list.  Please do not use
a snapshot before it has been announced that way.


[Bug tree-optimization/103457] boolean operations on bit-fields are not merged

2021-11-28 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103457

Andrew Pinski  changed:

   What|Removed |Added

   Keywords||missed-optimization
   Assignee|unassigned at gcc dot gnu.org  |pinskia at gcc dot 
gnu.org
 Ever confirmed|0   |1
   Last reconfirmed||2021-11-28
   Severity|normal  |enhancement
 Status|UNCONFIRMED |ASSIGNED

--- Comment #1 from Andrew Pinski  ---
Mine for GCC 13. There is other bugs which are similar too.

[PATCH] Extend usage of user hint in _Hashtable

2021-11-28 Thread François Dumont via Gcc-patches

    libstdc++: In _Hashtable, use insertion hint as much as possible.

    Make use in unordered containers of the user provided hint iterator 
as much as possible.


    Hint is now used:
    - As a hint for allocation, in order to limit memory fragmentation when
    allocator is making use of it.
    - For unordered_set/unordered_map we check if it does not match the 
key of the

    element to insert, before computing the hash code.
    - For unordered_multiset/unordered_multimap, if equals to the key 
of the element
    to insert, the hash code is taken from the hint so that we can take 
advantage of

    the potential hash code cache.

    Moreover, in _M_count_tr and _M_equal_range_tr reuse the first 
matching node key
    to check for other matching nodes to avoid any temporary 
instantiations.


    libstdc++-v3/ChangeLog:

    * include/bits/hashtable_policy.h 
(_NodeBuilder<>::_S_build): Add _NodePtr template

    parameter.
    (_ReuseOrAllocNode::operator()): Add __node_ptr parameter.
    (_AllocNode::operator()): Likewise.
    (_Insert_base::try_emplace): Adapt to use hint.
    (_Hash_code_base<>::_M_hash_code(const 
_Hash_node_value<>&)): New.
    (_Hashtable_base<>::_M_equals<>(const _Kt&, const 
_Hash_node_value<>&)): New.
    (_Hashtable_base<>::_M_equals<>(const _Kt&, __hash_code, 
const _Hash_node_value<>&)):

    Adapt, use latter.
    (_Hashtable_base<>::_M_equals_tr<>(const _Kt&, const 
_Hash_node_value<>&)): New.
    (_Hashtable_base<>::_M_equals_tr<>(const _Kt&, __hash_code, 
const _Hash_node_value<>&)):

    Adapt, use latter.
(_Hashtable_alloc<>::_M_allocate_node(__node_ptr, _Args&&...)): Add 
__node_ptr parameter.

    * include/bits/hashtable.h
(_Hashtable<>::_Scope_node<>(__hashtable_alloc*, __node_ptr, _Args&&...)):
    Add __node_ptr parameter.
    (_Hashtable<>::_M_get_node_hint(size_type, __node_ptr)): New.
    (_Hashtable<>::_M_emplace_unique(const_iterator, 
_Args&&...)): New.
    (_Hashtable<>::_M_emplace_multi(const_iterator, 
_Args&&...)): New.

    (_Hashtable<>::_M_emplace()): Adapt to use latter.
    (_Hashtable<>::_M_insert_unique(const_iterator, _Kt&&, 
_Arg&&, const _NodeGenerator&)):
    (_Hashtable<>::_M_reinsert_node(const_iterator, 
node_type&&)): Add const_iterator.

    Add const_iterator parameter.
    * include/bits/unordered_map.h 
(unordered_map<>::insert(node_type&&)): Pass cend as

    hint.
    (unordered_map<>::insert(const_iterator, node_type&&)): 
Adapt to use hint.
    * include/bits/unordered_set.h 
(unordered_set<>::insert(node_type&&)): Pass cend as

    hint.
    (unordered_set<>::insert(const_iterator, node_type&&)): 
Adapt to use hint.


Tested under Linux x86_64.

Ok to commit ?

François

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 6e2d4c10cfe..5010cefcd77 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -301,9 +301,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 	// Allocate a node and construct an element within it.
 	template
-	  _Scoped_node(__hashtable_alloc* __h, _Args&&... __args)
+	  _Scoped_node(__hashtable_alloc* __h,
+		   __node_ptr __hint, _Args&&... __args)
 	  : _M_h(__h),
-	_M_node(__h->_M_allocate_node(std::forward<_Args>(__args)...))
+	_M_node(__h->_M_allocate_node(__hint,
+	  std::forward<_Args>(__args)...))
 	  { }
 
 	// Destroy element and deallocate node.
@@ -818,6 +820,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  return nullptr;
 	}
 
+  // Gets a hint after which a node should be allocated given a bucket.
+  __node_ptr
+  _M_get_node_hint(size_type __bkt, __node_ptr __hint = nullptr) const
+  {
+	__node_base_ptr __node;
+	if (__node = _M_buckets[__bkt])
+	  return __node != &_M_before_begin
+	? static_cast<__node_ptr>(__node) : __hint;
+
+	return __hint;
+  }
+
   // Insert a node at the beginning of a bucket.
   void
   _M_insert_bucket_begin(size_type, __node_ptr);
@@ -846,26 +860,40 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
 	std::pair
-	_M_emplace(true_type __uks, _Args&&... __args);
+	_M_emplace_unique(const_iterator, _Args&&... __args);
 
   template
 	iterator
-	_M_emplace(false_type __uks, _Args&&... __args)
-	{ return _M_emplace(cend(), __uks, std::forward<_Args>(__args)...); }
+	_M_emplace_multi(const_iterator, _Args&&... __args);
+
+  template
+	std::pair
+	_M_emplace(true_type /*__uks*/, _Args&&... __args)
+	{ return _M_emplace_unique(cend(), std::forward<_Args>(__args)...); }
 
-  // Emplace with hint, useless when keys are unique.
   template
 	iterator
-	_M_emplace(const_iterator, true_type __uks, _Args&&... __args)
-	{ return _M_emplace(__uks, std::forward<_Args>(__args)...).first; }
+	_M_emplace(false_type 

[Bug target/103302] wrong code with -fharden-compares

2021-11-28 Thread aoliva at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103302

--- Comment #5 from Alexandre Oliva  ---
Hello, Jim,

Thanks for the investigation, that's useful.  I guess the register allocator
shouldn't choose to coalesce registers when there's a clobber afterwards, or it
should drop the clobber, since otherwise it clobbers the coalesced set.

Anyway, I'm reworking the asm that prevents optimization because of bug 103149
and bug 103097, it might end up avoiding this unfortunate situation.  We'll
see...

Re: [PATCH] tree-optimization: [PR101540] Simplify CONSTRUCTOR for vector(1) to be VCE

2021-11-28 Thread Jeff Law via Gcc-patches




On 11/28/2021 10:56 AM, apinski--- via Gcc-patches wrote:

From: Andrew Pinski 

This just adds a simplification to simplify_vector_constructor for
vector of 1 element to be VCE which should reduce memory usage in
the compiler and maybe allow for some more optimizations.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/101540

gcc/ChangeLog:

* tree-ssa-forwprop.c (simplify_vector_constructor):
Simplify constructor of vector of 1 element to just
be a VIEW_CONVERT_EXPR.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr101540-1.c: New test.
So why generate a VCE here if the type conversion is useless?  Why not 
just a NOP_EXPR?  Is there something special about converting between 
the element type and the outer vector type that requires VCE rather than 
NOP_EXR?  Neither an ACK or NAK, just trying to understand it a bit better.


Jeff




Re: [PATCH] Fix PR 19089: Environment variable TMP may yield gcc: abort

2021-11-28 Thread Jeff Law via Gcc-patches




On 11/27/2021 7:49 PM, apinski--- via Gcc-patches wrote:

From: Andrew Pinski 

Even though I cannot reproduce the ICE any more, this is still
a bug. We check already to see if we can access the directory
but never check to see if the path is actually a directory.

This adds the check and now we reject the file as not usable
as a tmp directory.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

libiberty/ChangeLog:

* make-temp-file.c (try_dir): Check to see if the dir
is actually a directory.
---
  libiberty/make-temp-file.c | 16 +++-
  1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/libiberty/make-temp-file.c b/libiberty/make-temp-file.c
index 31f87fbcfde..11eb03d12ec 100644
--- a/libiberty/make-temp-file.c
+++ b/libiberty/make-temp-file.c
@@ -39,6 +39,10 @@ Boston, MA 02110-1301, USA.  */
  #if defined(_WIN32) && !defined(__CYGWIN__)
  #include 
  #endif
+#if HAVE_SYS_STAT_H
+#include 
+#endif
+
  
  #ifndef R_OK

  #define R_OK 4
@@ -76,7 +80,17 @@ try_dir (const char *dir, const char *base)
  return base;
if (dir != 0
&& access (dir, R_OK | W_OK | X_OK) == 0)
-return dir;
+{
+  /* Check to make sure dir is actually a directory. */
+#ifdef S_ISDIR
+  struct stat s;
+  if (stat(dir, ))

Formatting nit, missing whitespace between stat and open paren.

Presumably this doesn't fix the problem in the case where S_ISDIR is not 
defined.  But it's still an improvement.  OK with the nit fixed.


jeff



Re: [PATCH] Fix PR 62157: disclean in libsanitizer not working

2021-11-28 Thread Jeff Law via Gcc-patches




On 11/27/2021 6:19 PM, apinski--- via Gcc-patches wrote:

From: Andrew Pinski 

So what is happening is DIST_SUBDIRS contains the conditional
directories which is wrong, so we need to force DIST_SUBDIRS
to be the same as SUBDIRS as recommened by the automake manual.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Also now make distclean works inside libsanitizer directory.

libsanitizer/ChangeLog:

PR sanitizer/62157
* Makefile.am: Force DIST_SUBDIRS to be SUBDIRS.
* Makefile.in: Regenerate.
* asan/Makefile.in: Likewise.
* hwasan/Makefile.in: Likewise.
* interception/Makefile.in: Likewise.
* libbacktrace/Makefile.in: Likewise.
* lsan/Makefile.in: Likewise.
* sanitizer_common/Makefile.in: Likewise.
* tsan/Makefile.in: Likewise.
* ubsan/Makefile.in: Likewise.

OK
jeff



[Bug tree-optimization/103458] New: [12 Regression] ICE in verify_loop_structure, at cfgloop.c:1736 (error: loop with header 4 not in loop tree)

2021-11-28 Thread asolokha at gmx dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103458

Bug ID: 103458
   Summary: [12 Regression] ICE in verify_loop_structure, at
cfgloop.c:1736 (error: loop with header 4 not in loop
tree)
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Keywords: ice-on-valid-code
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: asolokha at gmx dot com
  Target Milestone: ---

gcc-12.0.0-alpha20211121 snapshot (g:da17c304e22ba256eba0b03710aa329115163b08)
ICEs when compiling the following testcase w/ -O2:

__attribute__ ((returns_twice)) int
bar (void);

void
foo (int *p, int x)
{
  *p = 0;
  while (*p < 1)
{
  x = 0;
  while (x < 1)
bar ();

  x /= 0;
}

  foo (p, x);
}

% gcc-12.0.0 -O2 -w -c xiksrvsw.c
xiksrvsw.c: In function 'foo':
xiksrvsw.c:5:1: error: loop with header 4 not in loop tree
5 | foo (int *p, int x)
  | ^~~
during GIMPLE pass: cddce
xiksrvsw.c:5:1: internal compiler error: in verify_loop_structure, at
cfgloop.c:1736
0x6816e7 verify_loop_structure()
   
/var/tmp/portage/sys-devel/gcc-12.0.0_alpha20211121/work/gcc-12-20211121/gcc/cfgloop.c:1736
0xdb8457 execute_function_todo
   
/var/tmp/portage/sys-devel/gcc-12.0.0_alpha20211121/work/gcc-12-20211121/gcc/passes.c:2058
0xdb8cbe execute_todo
   
/var/tmp/portage/sys-devel/gcc-12.0.0_alpha20211121/work/gcc-12-20211121/gcc/passes.c:2096

Re: [RFC][PATCH] c++/46476 - implement -Wunreachable-code-return

2021-11-28 Thread Jeff Law via Gcc-patches




On 11/26/2021 5:18 AM, Richard Biener via Gcc-patches wrote:

This implements a subset of -Wunreachable-code, unreachable code
after a return stmt.  Contrary to the previous attemt at CFG
construction time this implements the bits during GIMPLE lowering
where there are still all GIMPLE return stmts in the IL.

The lowering phase keeps track of whether stmts can fallthru
which is used to determine if the following stmt is reachable.
The implementation only considers labels here.

The fallthru flag is transparently extended to allow tracking
a reason for non-fallthruness which is used to mark returns.

This patch runs in to the same stray return/gcc_unreachable as the
previous one and thus requires cleanup across the GCC code base
which seems controversical.  So I'm putting this on hold unless
I receive some OK for cleanup in any way, meaning this isn't
going to make stage3.

Sorry.

Richard.

2021-11-26  Richard Biener  

PR c++/46476
gcc/cp/
* decl.c (finish_function): Set input_location to
BUILTINS_LOCATION around the code building the return 0
for main().
* cp-gimplify.c (genericize_if_stmt): Avoid optimizing if (true)
and if (false) when -Wunreachable-code-return is in effect.

gcc/
* common.opt (Wunreachable-code): Re-enable.
(Wunreachable-code-return): New diagnostic, enabled by
-Wextra and -Wunreachable-code.
* doc/invoke.texi (Wunreachable-code): Document.
(Wunreachable-code-return): Likewise.
* gimple-low.c: Include diagnostic.h.
(struct cft_reason): New.
(lower_data::cannot_fallthru): Make a cft_reason.
(lower_stmt): Diagnose unreachable stmts after a return.
* Makefile.in (insn-emit.o-warn): Disable
-Wunreachable-code-return.

gcc/testsuite/
* c-c++-common/Wunreachable-code-return-1.c: New testcase.

I wouldn't object to this moving forward.  I've already ACK'd the cleanups.

Jeff



[Bug tree-optimization/103457] New: boolean operations on bit-fields are not merged

2021-11-28 Thread roland.illig at gmx dot de via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103457

Bug ID: 103457
   Summary: boolean operations on bit-fields are not merged
   Product: gcc
   Version: 12.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: roland.illig at gmx dot de
  Target Milestone: ---

~~~c
#include 

typedef struct GNodeFlagsS {
bool remake:1;
bool childMade:1;
bool force:1;
bool doneWait:1;
bool doneOrder:1;
bool fromDepend:1;
bool doneAllsrc:1;
bool cycle:1;
bool doneCycle:1;
} GNodeFlags;

bool
GNodeFlags_IsNone(GNodeFlags flags)
{
return !flags.remake
   && !flags.childMade
   && !flags.force
   && !flags.doneWait
   && !flags.doneOrder
   && !flags.fromDepend
   && !flags.doneAllsrc
   && !flags.cycle
   && !flags.doneCycle;
}
~~~

On x86_64, GCC 11.2 generates:

~~~asm
GNodeFlags_IsNone(GNodeFlagsS):
mov eax, edi
and eax, 1
jne .L6
testdil, 2
jne .L1
mov eax, edi
shr ax, 2
and eax, 1
jne .L6
testdil, 8
jne .L1
mov eax, edi
shr ax, 4
and eax, 1
jne .L6
testdil, 32
jne .L1
mov eax, edi
shr ax, 6
and eax, 1
jne .L6
testdil, dil
js  .L1
shr di, 8
mov eax, edi
and eax, 1
xor eax, 1
ret
.L6:
xor eax, eax
.L1:
ret
~~~

ICC 2021.3.0 generates shorter code:

~~~asm
test  edi, 1#18.10
jne   ..B1.10   # Prob 50%  #18.10
test  edi, 2#19.13
jne   ..B1.10   # Prob 50%  #19.13
test  edi, 4#20.13
jne   ..B1.10   # Prob 50%  #20.13
(and so on)
~~~

Many other compilers fail to see the potential for optimizing this code as
well.

Clang is better, it generates:

~~~asm
GNodeFlags_IsNone(GNodeFlagsS): # @GNodeFlags_IsNone(GNodeFlagsS)
testedi, 511
seteal
ret
~~~

Re: [PATCH] x86_64: PR target/100711: Splitters for pandn

2021-11-28 Thread Uros Bizjak via Gcc-patches
On Sun, Nov 28, 2021 at 2:25 PM Roger Sayle  wrote:
>
>
> This patch addresses PR target/100711 by introducing define_split
> patterns so that not/broadcast/pand may be simplified (by combine)
> to broadcast/pandn.  This introduces two splitters one for optimizing
> pandn on TARGET_SSE for V4SI and V2DI, and another for vpandn on
> TARGET_AVX2 for V16QI, V8HI, V32QI, V16HI and V8SI.  Each splitter
> has its own new testcase.
>
> I've also confirmed that not/broadcast/pandn is already getting
> simplified to broadcast/pand by the middle-end optimizers.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check with no new failures.  Ok for mainline?
>
>
> 2021-11-28  Roger Sayle  
>
> gcc/ChangeLog
> PR target/100711
> * config/i386/sse.md (define_split): New splitters to simplify
> not;vec_duplicate;and as vec_duplicate;andn.
>
> gcc/testsuite/ChangeLog
> PR target/100711
> * gcc.target/i386/pr100711-1.c: New test case.
> * gcc.target/i386/pr100711-2.c: New test case.


+;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
+(define_split
+  [(set (match_operand:VI48_128 0 "register_operand")
+ (and:VI48_128
+  (vec_duplicate:VI48_128
+(not:
+  (match_operand: 1 "register_operand")))
+  (match_operand:VI48_128 2 "register_operand")))]

You can use "vector_operand" here, the resulting PANDN can handle these.

+  "TARGET_SSE && can_create_pseudo_p ()"

This is a combine splitter, so can_create_pseudo_p () is not needed,
because it runs only during the combine phase.

FYI, the combine splitter is somehow different than normal splitter,
the important part from the documentation is, that the insn is *not*
matched by some define_insn pattern, and the split results in exactly
two patterns:

The insn combiner phase also splits putative insns.  If three insns are
merged into one insn with a complex expression that cannot be matched by
some 'define_insn' pattern, the combiner phase attempts to split the
complex pattern into two insns that are recognized.  Usually it can
break the complex pattern into two patterns by splitting out some
subexpression.  However, in some other cases, such as performing an
addition of a large constant in two insns on a RISC machine, the way to
split the addition into two insns is machine-dependent.

+  [(set (match_dup 3)
+ (vec_duplicate:VI48_128 (match_dup 1)))
+   (set (match_dup 0)
+ (and:VI48_128 (not:VI48_128 (match_dup 3))
+  (match_dup 2)))]
+  "operands[3] = gen_reg_rtx (mode);")
+
+;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
+(define_split
+  [(set (match_operand:VI124_AVX2 0 "register_operand")
+ (and:VI124_AVX2
+  (vec_duplicate:VI124_AVX2
+(not:
+  (match_operand: 1 "register_operand")))
+  (match_operand:VI124_AVX2 2 "register_operand")))]
+  "TARGET_AVX2 && can_create_pseudo_p ()"
+  [(set (match_dup 3)
+ (vec_duplicate:VI124_AVX2 (match_dup 1)))
+   (set (match_dup 0)
+ (and:VI124_AVX2 (not:VI124_AVX2 (match_dup 3))
+ (match_dup 2)))]
+  "operands[3] = gen_reg_rtx (mode);")

Same here as above.

+/* { dg-do compile } */
+/* { dg-options "-O2" } */

Please add -msse2 here, 32bit targets do not enable SSE by default,
and please check if they handle DImode long long at all.

Also, please run tests for x86_64 and i386 targets. The testsuite
should be ran with:

make -k check RUNTESTFLAGS="--target_board=unix\{,-m32\}"

(Eventually, you can use check-gcc instead of check and/or add
i386.exp after --target-board.)

Uros.

+typedef int v4si __attribute__((vector_size (16)));
+typedef long long v2di __attribute__((vector_size (16)));
+
+v4si foo (int a, v4si b)
+{
+return (__extension__ (v4si) {~a, ~a, ~a, ~a}) & b;
+}
+
+v2di bar (long long a, v2di b)
+{
+return (__extension__ (v2di) {~a, ~a}) & b;
+}

>
> Thanks in advance,
> Roger
> --
>


[Bug c++/103455] [9/10/11/12 Regression] internal compiler error: in dependent_type_p, at cp/pt.c:27057

2021-11-28 Thread stha09 at googlemail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103455

--- Comment #4 from Stephan Hartmann  ---
Last working compiler was gcc-8.1, gcc-8.2 and later produce an ICE.

  1   2   >