[PATCH v2, rs6000] Add a combine pattern for CA minus one [PR95737]
Hi, This patch adds a combine pattern for "CA minus one". As CA only has two values (0 or 1), we could convert following pattern (sign_extend:DI (plus:SI (reg:SI 98 ca) (const_int -1 [0x] to (plus:DI (reg:DI 98 ca) (const_int -1 [0x]))) With this patch, one unnecessary sign extend is eliminated. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot. ChangeLog 2022-01-20 Haochen Gui gcc/ * config/rs6000/rs6000.md (extenddi_ca_minus_one): Define. gcc/testsuite/ * gcc.target/powerpc/pr95737.c: New. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 6ecb0bd6142..1d8b212962f 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -2358,6 +2358,19 @@ (define_insn "subf3_carry_in_xx" "subfe %0,%0,%0" [(set_attr "type" "add")]) +(define_insn_and_split "*extenddi_ca_minus_one" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (sign_extend:DI (plus:SI (reg:SI CA_REGNO) +(const_int -1] + "" + "#" + "" + [(parallel [(set (match_dup 0) + (plus:DI (reg:DI CA_REGNO) + (const_int -1))) + (clobber (reg:DI CA_REGNO))])] + "" +) (define_insn "@neg2" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c b/gcc/testsuite/gcc.target/powerpc/pr95737.c new file mode 100644 index 000..94320f23423 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c @@ -0,0 +1,10 @@ +/* PR target/95737 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ +/* { dg-final { scan-assembler-not {\mextsw\M} } } */ + + +unsigned long long negativeLessThan (unsigned long long a, unsigned long long b) +{ + return -(a < b); +}
Re: [pushed] c++: layout of aggregate base with DMI [PR103681]
> From: Jason Merrill via Gcc-patches > Date: Fri, 17 Dec 2021 04:26:55 +0100 > C++14 changed the definition of 'aggregate' to allow default member > initializers, but such classes still need to be considered "non-POD for the > purpose of layout" for ABI compatibility with C++11 code. It seems rare to > derive from such a class, as evidenced by how long this bug has > survived (since r216750 in 2014), but it's certainly worth fixing. > > We only warn when we were failing to allocate another field into the > tail padding of the newly aggregate class; this is the only ABI impact. > > This also changes end_of_class to consider all data members, not just empty > data members; that used to be an additional flag, removed in r9-5710, but I > don't see any reason not to always include them. This makes the result of > the function correspond to the ABI nvsize term and its nameless counterpart > that does include virtual bases. > > When looking closely at other users of end_of_class, I realized that we were > assuming that the latter corresponded to the ABI dsize term, but it doesn't > if the class ends with an empty virtual base (in the rare case that the > empty base can't be assigned offset 0), and this matters for layout of > [[no_unique_address]]. So I added another mode that returns the desired > value for that case. I'm not adding a warning for this ABI fix because it's > a C++20 feature. > > Tested x86_64-pc-linux-gnu, applying to trunk. > > PR c++/103681 > > gcc/ChangeLog: > > * common.opt (fabi-version): Add v17. > > gcc/cp/ChangeLog: > > * cp-tree.h (struct lang_type): Add non_pod_aggregate. > (CLASSTYPE_NON_POD_AGGREGATE): New. > * class.c (check_field_decls): Set it. > (check_bases_and_members): Check it. > (check_non_pod_aggregate): New. > (enum eoc_mode): New. > (end_of_class): Always include non-empty fields. > Add eoc_nv_or_dsize mode. > (include_empty_classes, layout_class_type): Adjust. Looks like this caused https://gcc.gnu.org/PR104139. brgds, H-P
[pushed] c++: template-id with current inst qualifier [PR102300]
The patch for PR41723 properly changed one place to look into the current instantiation; now we need to fix this place as well. Tested x86_64-pc-linux-gnu, applying to trunk. PR c++/102300 gcc/cp/ChangeLog: * parser.cc (cp_parser_template_name): Use dependent_scope_p. gcc/testsuite/ChangeLog: * g++.dg/template/nested7.C: New test. --- gcc/cp/parser.cc | 8 gcc/testsuite/g++.dg/parse/no-typename1.C | 2 +- gcc/testsuite/g++.dg/template/nested7.C | 12 3 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/nested7.C diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 00279c43404..ed219d79dc9 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -18574,7 +18574,7 @@ cp_parser_template_name (cp_parser* parser, : parser->context->object_type); if (scope && TYPE_P (scope) && (!CLASS_TYPE_P (scope) - || (check_dependency_p && dependent_type_p (scope + || (check_dependency_p && dependent_scope_p (scope { /* We're optimizing away the call to cp_parser_lookup_name, but we still need to do this. */ @@ -18667,9 +18667,9 @@ cp_parser_template_name (cp_parser* parser, found = true; } - /* "in a type-only context" */ + /* "that follows the keyword template"..."in a type-only context" */ if (!found && scope - && tag_type != none_type + && (template_keyword_p || tag_type != none_type) && dependentish_scope_p (scope) && cp_parser_nth_token_starts_template_argument_list_p (parser, 1)) found = true; @@ -18680,7 +18680,7 @@ cp_parser_template_name (cp_parser* parser, cp_parser_error (parser, "expected template-name"); return error_mark_node; } - else if (decl == error_mark_node) + else if (!DECL_P (decl) && !is_overloaded_fn (decl)) /* Repeat the lookup at instantiation time. */ decl = identifier; } diff --git a/gcc/testsuite/g++.dg/parse/no-typename1.C b/gcc/testsuite/g++.dg/parse/no-typename1.C index 711c621e26a..f40ea0ec13c 100644 --- a/gcc/testsuite/g++.dg/parse/no-typename1.C +++ b/gcc/testsuite/g++.dg/parse/no-typename1.C @@ -6,6 +6,6 @@ template struct A { template struct B { -A::template B foo(); // { dg-error "" "" { target c++17_down } } +A::template B foo(); }; }; diff --git a/gcc/testsuite/g++.dg/template/nested7.C b/gcc/testsuite/g++.dg/template/nested7.C new file mode 100644 index 000..3a5930c0f31 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/nested7.C @@ -0,0 +1,12 @@ +// PR c++/102300 + +template +struct holder +{ + template struct fn {}; + + struct t1 : fn {}; // pass + struct t2 : holder::fn {}; // fail + struct t3 : holder::template fn {}; // fail + struct t4 : holder::template fn {}; // pass +}; base-commit: 64655e7bdbea6b922f1f759eb9f1730b5e57029f -- 2.27.0
Re: [PATCH] c++: CTAD within alias template [PR91911]
On 1/19/22 17:32, Patrick Palka wrote: On Wed, 19 Jan 2022, Jason Merrill wrote: On 1/3/22 10:24, Patrick Palka wrote: On Wed, 22 Dec 2021, Jason Merrill wrote: On 12/21/21 14:08, Patrick Palka wrote: On Tue, Dec 21, 2021 at 2:03 PM Patrick Palka wrote: On Wed, Jun 30, 2021 at 4:23 PM Jason Merrill wrote: On 6/30/21 4:18 PM, Patrick Palka wrote: On Wed, Jun 30, 2021 at 3:51 PM Jason Merrill wrote: On 6/30/21 11:58 AM, Patrick Palka wrote: On Wed, 30 Jun 2021, Patrick Palka wrote: On Fri, 25 Jun 2021, Jason Merrill wrote: On 6/25/21 1:11 PM, Patrick Palka wrote: On Fri, 25 Jun 2021, Jason Merrill wrote: On 6/24/21 4:45 PM, Patrick Palka wrote: In the first testcase below, during parsing of the alias template ConstSpanType, transparency of alias template specializations means we replace SpanType with SpanType's substituted definition. But this substitution lowers the level of the CTAD placeholder for span(T()) from 2 to 1, and so the later instantiantion of ConstSpanType erroneously substitutes this CTAD placeholder with the template argument at level 1 index 0, i.e. with int, before we get a chance to perform the CTAD. In light of this, it seems we should avoid level lowering when substituting through through the type-id of a dependent alias template specialization. To that end this patch makes lookup_template_class_1 pass tf_partial to tsubst in this situation. This makes sense, but what happens if SpanType is a member template, so that the levels of it and ConstSpanType don't match? Or the other way around? If SpanType is a member template of say the class template A (and thus its level is greater than ConstSpanType): template struct A { template using SpanType = decltype(span(T())); }; template using ConstSpanType = span::SpanType::value_type>; using type = ConstSpanType; then this case luckily works even without the patch because instantiate_class_template now reuses the specialization A::SpanType that was formed earlier during instantiation of A, where we substitute only a single level of template arguments, so the level of the CTAD placeholder inside the defining-type-id of this specialization dropped from 3 to 2, so still more than the level of ConstSpanType. This luck is short-lived though, because if we replace A::SpanType with say A::SpanType then the testcase breaks again (without the patch) because we no longer can reuse that specialization, so we instead form it on the spot by substituting two levels of template arguments (U=int,T=T) into the defining-type-id, causing the level of the placeholder to drop to 1. I think the patch causes its level to remain 3 (though I guess it should really be 2). For the other way around, if ConstSpanType is a member template of say the class template B (and thus its level is greater than SpanType): template using SpanType = decltype(span(T())); template struct B { template using ConstSpanType = span::value_type>; }; using type = B::ConstSpanType; then tf_partial doesn't help here at all; we end up substituting 'int' for the CTAD placeholder... What it seems we need is to _increase_ the level of the CTAD placeholder from 2 to 3 during the dependent substitution.. Hmm, rather than messing with tf_partial, which is apparently only a partial solution, maybe we should just make tsubst never substitute a CTAD placeholder -- they should always be resolved from do_class_deduction, and their level doesn't really matter otherwise. (But we'd still want to substitute into the CLASS_PLACEHOLDER_TEMPLATE of the placeholder in case it's a template template parm.) Something like: diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 5107bfbf9d1..dead651ed84 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -15552,7 +15550,8 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) levels = TMPL_ARGS_DEPTH (args); if (level <= levels - && TREE_VEC_LENGTH (TMPL_ARGS_LEVEL (args, level)) > 0) + && TREE_VEC_LENGTH (TMPL_ARGS_LEVEL (args, level)) > 0 + && !template_placeholder_p (t)) { arg = TMPL_ARG (args, level, idx); seems to work better. Makes sense. Here's a patch that implements that. I reckon it's good to have both workarounds in place because the tf_partial workaround is necessary to accept class-deduction93a.C below, and the tsubst workaround is necessary to accept class-deduction-92b.C below. Whoops, forgot to git-add class-deduction93a.C: -- >8 -- Subject: [PATCH] c++: CTAD within alias template [PR91911] In the first testcase below, during parsing of the alias template ConstSpanType, transparency of alias template specializations means we replace SpanType with SpanType's substituted definition. But this substitution lowers the level of the CTAD placeholder for span{T()} from 2 to 1, and so the
Re: [PATCH, rs6000] Add a combine pattern for CA minus one [PR95737]
On Wed, Jan 19, 2022 at 6:12 PM HAO CHEN GUI wrote: > > > > On 19/1/2022 下午 3:52, Andrew Pinski wrote: > > On Tue, Jan 18, 2022 at 11:13 PM HAO CHEN GUI via Gcc-patches > > wrote: > >> > >> Hi, > >>This patch adds a combine pattern for "CA minus one". As CA only has two > >> values (0 or 1), we could convert following pattern > >> (sign_extend:DI (plus:SI (reg:SI 98 ca) > >> (const_int -1 [0x] > >> to > >>(plus:DI (reg:DI 98 ca) > >> (const_int -1 [0x]))) > >> With this patch, it eliminates one unnecessary sign extend. Also in > >> rs6000, > >> regclass of CA register is set to NO_REGS. So CA is not in hard register > >> set > >> and it can't match register_operand. The patch changes it to any_operand. > >> > >> Bootstrapped and tested on powerpc64-linux BE and LE with no > >> regressions. > >> Is this okay for trunk? Any recommendations? Thanks a lot. > >> > >> ChangeLog > >> 2022-01-19 Haochen Gui > >> > >> gcc/ > >> * config/rs6000/predicates.md (ca_operand): Match any_operand as CA > >> register is not in hard register set. > >> * config/rs6000/rs6000.md (extenddi_ca_minus_one): Define. > >> > >> gcc/testsuite/ > >> * gcc.target/powerpc/pr95737.c: New. > >> > >> > >> patch.diff > >> diff --git a/gcc/config/rs6000/predicates.md > >> b/gcc/config/rs6000/predicates.md > >> index c65dfb91f3d..cd2ae1dc8e0 100644 > >> --- a/gcc/config/rs6000/predicates.md > >> +++ b/gcc/config/rs6000/predicates.md > >> @@ -188,7 +188,7 @@ (define_predicate "vlogical_operand" > >> > >> ;; Return 1 if op is the carry register. > >> (define_predicate "ca_operand" > >> - (match_operand 0 "register_operand") > >> + (match_operand 0 "any_operand") > >> { > >>if (SUBREG_P (op)) > >> op = SUBREG_REG (op); > >> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > >> index 6ecb0bd6142..f1b09aad3b5 100644 > >> --- a/gcc/config/rs6000/rs6000.md > >> +++ b/gcc/config/rs6000/rs6000.md > >> @@ -2358,6 +2358,21 @@ (define_insn "subf3_carry_in_xx" > >>"subfe %0,%0,%0" > >>[(set_attr "type" "add")]) > >> > >> +(define_insn_and_split "*extenddi_ca_minus_one" > >> + [(set (match_operand:DI 0 "gpc_reg_operand") > >> + (sign_extend:DI (plus:SI (match_operand:SI 1 "ca_operand") > >> +(const_int -1] > >> + "" > >> + "#" > >> + "" > >> + [(parallel [(set (match_dup 0) > >> + (plus:DI (match_dup 2) > >> + (const_int -1))) > >> + (clobber (match_dup 2))])] > >> +{ > >> + operands[2] = copy_rtx (operands[1]); > >> + PUT_MODE (operands[2], DImode); > >> +}) > > > > There are a few things missing I think for this to be correct. > > I think it should be: > > (define_insn_and_split "*extenddi_ca_minus_one" > > [(set (match_operand:DI 0 "gpc_reg_operand" "=r") > >(sign_extend:DI (plus:SI (reg:SI CA_REGNO) > > (const_int -1] > > "" > > "#" > > "&& reload_completed" > > [(parallel [(set (match_dup 0) > > (plus:DI (reg:DI CA_REGNO) > >(const_int -1))) > > (clobber (reg:DI CA_REGNO))])] > > {}) > > > > There is no reason to change ca_operand either since > > subf3_carry_in_xx already hard codes the CA_REGNO too; you can > Yes, we can directly use CA_REGNO. It makes the pattern compact. > But why it needs reload_completed? Could you explain it? Actually you are right, there is no reason for the reload_completed any more. It was definitely needed before when you were doing PUT_MODE as I didn't trust how ca_operand would have caught only the CA_REGNO register. Thanks, Andrew > > Thanks. > Gui Haochen > > > just use it directly like above. > > > > Sorry for the incorrect whitespace formatting though. > > > > Thanks, > > Andrew Pinski > > > >> > >> (define_insn "@neg2" > >>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r") > >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c > >> b/gcc/testsuite/gcc.target/powerpc/pr95737.c > >> new file mode 100644 > >> index 000..94320f23423 > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c > >> @@ -0,0 +1,10 @@ > >> +/* PR target/95737 */ > >> +/* { dg-do compile { target lp64 } } */ > >> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ > >> +/* { dg-final { scan-assembler-not {\mextsw\M} } } */ > >> + > >> + > >> +unsigned long long negativeLessThan (unsigned long long a, unsigned long > >> long b) > >> +{ > >> + return -(a < b); > >> +}
Re: [PATCH, rs6000] Add a combine pattern for CA minus one [PR95737]
On 19/1/2022 下午 3:52, Andrew Pinski wrote: > On Tue, Jan 18, 2022 at 11:13 PM HAO CHEN GUI via Gcc-patches > wrote: >> >> Hi, >>This patch adds a combine pattern for "CA minus one". As CA only has two >> values (0 or 1), we could convert following pattern >> (sign_extend:DI (plus:SI (reg:SI 98 ca) >> (const_int -1 [0x] >> to >>(plus:DI (reg:DI 98 ca) >> (const_int -1 [0x]))) >> With this patch, it eliminates one unnecessary sign extend. Also in >> rs6000, >> regclass of CA register is set to NO_REGS. So CA is not in hard register set >> and it can't match register_operand. The patch changes it to any_operand. >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >> Is this okay for trunk? Any recommendations? Thanks a lot. >> >> ChangeLog >> 2022-01-19 Haochen Gui >> >> gcc/ >> * config/rs6000/predicates.md (ca_operand): Match any_operand as CA >> register is not in hard register set. >> * config/rs6000/rs6000.md (extenddi_ca_minus_one): Define. >> >> gcc/testsuite/ >> * gcc.target/powerpc/pr95737.c: New. >> >> >> patch.diff >> diff --git a/gcc/config/rs6000/predicates.md >> b/gcc/config/rs6000/predicates.md >> index c65dfb91f3d..cd2ae1dc8e0 100644 >> --- a/gcc/config/rs6000/predicates.md >> +++ b/gcc/config/rs6000/predicates.md >> @@ -188,7 +188,7 @@ (define_predicate "vlogical_operand" >> >> ;; Return 1 if op is the carry register. >> (define_predicate "ca_operand" >> - (match_operand 0 "register_operand") >> + (match_operand 0 "any_operand") >> { >>if (SUBREG_P (op)) >> op = SUBREG_REG (op); >> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >> index 6ecb0bd6142..f1b09aad3b5 100644 >> --- a/gcc/config/rs6000/rs6000.md >> +++ b/gcc/config/rs6000/rs6000.md >> @@ -2358,6 +2358,21 @@ (define_insn "subf3_carry_in_xx" >>"subfe %0,%0,%0" >>[(set_attr "type" "add")]) >> >> +(define_insn_and_split "*extenddi_ca_minus_one" >> + [(set (match_operand:DI 0 "gpc_reg_operand") >> + (sign_extend:DI (plus:SI (match_operand:SI 1 "ca_operand") >> +(const_int -1] >> + "" >> + "#" >> + "" >> + [(parallel [(set (match_dup 0) >> + (plus:DI (match_dup 2) >> + (const_int -1))) >> + (clobber (match_dup 2))])] >> +{ >> + operands[2] = copy_rtx (operands[1]); >> + PUT_MODE (operands[2], DImode); >> +}) > > There are a few things missing I think for this to be correct. > I think it should be: > (define_insn_and_split "*extenddi_ca_minus_one" > [(set (match_operand:DI 0 "gpc_reg_operand" "=r") >(sign_extend:DI (plus:SI (reg:SI CA_REGNO) > (const_int -1] > "" > "#" > "&& reload_completed" > [(parallel [(set (match_dup 0) > (plus:DI (reg:DI CA_REGNO) >(const_int -1))) > (clobber (reg:DI CA_REGNO))])] > {}) > > There is no reason to change ca_operand either since > subf3_carry_in_xx already hard codes the CA_REGNO too; you can Yes, we can directly use CA_REGNO. It makes the pattern compact. But why it needs reload_completed? Could you explain it? Thanks. Gui Haochen > just use it directly like above. > > Sorry for the incorrect whitespace formatting though. > > Thanks, > Andrew Pinski > >> >> (define_insn "@neg2" >>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r") >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c >> b/gcc/testsuite/gcc.target/powerpc/pr95737.c >> new file mode 100644 >> index 000..94320f23423 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c >> @@ -0,0 +1,10 @@ >> +/* PR target/95737 */ >> +/* { dg-do compile { target lp64 } } */ >> +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ >> +/* { dg-final { scan-assembler-not {\mextsw\M} } } */ >> + >> + >> +unsigned long long negativeLessThan (unsigned long long a, unsigned long >> long b) >> +{ >> + return -(a < b); >> +}
[PATCH] constrain conservative string lengths to array sizes [PR104119]
The attached patch suppresses a class of unexpected -Wformat-overflow (and -truncation) warnings introduced as a result of better range info with the integration of the strlen pass with Ranger. The sprintf warning code relies on the strlen pass data to determine the lengths of string arguments to %s directives. When the data for a string are present, such as after a strlen call, the length can be either a constant or, in the case of interest, a range (including [N, PTRDIFF_MAX - 2] for a string of unbounded length). When absent because no string call has been seen yet, the string length is considered to be bounded by the size of the array it's stored in. This constrains the maximum number of bytes output by the %s directive and reduces false positives. The problem this patch addresses is that in the interesting case there is no logic similar to the last ("no data") case, and so the maximum number of bytes can be in excess of the size of the array. The patch does it by computing the size of the object (or member) in which the string is stored and using its size minus 1 as the upper bound on the length. To do that, I had to adjust the APIs to pass in the pointer_query instance of the range_query. The meat of the change is in the new get_maxbound() function. There might be opportunities to do better still. I'll try to look into them if I still have time. Tested on x86_64-linux. MartinConstrain conservative string lengths to array sizes [PR104119]. Resolves: PR tree-optimization/104119 - unexpected -Wformat-overflow after strlen in ILP32 since Ranger integration gcc/ChangeLog: PR tree-optimization/104119 * gimple-ssa-sprintf.cc (struct directive): Change argument type. (format_none): Same. (format_percent): Same. (format_integer): Same. (format_floating): Same. (get_string_length): Same. (format_character): Same. (format_string): Same. (format_plain): Same. (format_directive): Same. (compute_format_length): Same. (handle_printf_call): Same. * tree-ssa-strlen.cc (get_range_strlen_dynamic): Same. Call get_maxbound. (get_range_strlen_phi): Same. (get_maxbound): New function. (strlen_pass::get_len_or_size): Adjust to parameter change. * tree-ssa-strlen.h (get_range_strlen_dynamic): Change argument type. gcc/testsuite/ChangeLog: PR tree-optimization/104119 * gcc.dg/tree-ssa/builtin-sprintf-warn-29.c: New test. diff --git a/gcc/gimple-ssa-sprintf.cc b/gcc/gimple-ssa-sprintf.cc index 98ab563a01b..268b1eed427 100644 --- a/gcc/gimple-ssa-sprintf.cc +++ b/gcc/gimple-ssa-sprintf.cc @@ -600,7 +600,7 @@ struct directive /* Format conversion function that given a directive and an argument returns the formatting result. */ - fmtresult (*fmtfunc) (const directive &, tree, range_query *); + fmtresult (*fmtfunc) (const directive &, tree, pointer_query &); /* Return True when the format flag CHR has been used. */ bool get_flag (char chr) const @@ -968,7 +968,7 @@ directive::set_precision (tree arg, range_query *query) /* Return the result of formatting a no-op directive (such as '%n'). */ static fmtresult -format_none (const directive &, tree, range_query *) +format_none (const directive &, tree, pointer_query &) { fmtresult res (0); return res; @@ -977,7 +977,7 @@ format_none (const directive &, tree, range_query *) /* Return the result of formatting the '%%' directive. */ static fmtresult -format_percent (const directive &, tree, range_query *) +format_percent (const directive &, tree, pointer_query &) { fmtresult res (1); return res; @@ -1199,7 +1199,7 @@ adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax) used when the directive argument or its value isn't known. */ static fmtresult -format_integer (const directive , tree arg, range_query *query) +format_integer (const directive , tree arg, pointer_query _qry) { tree intmax_type_node; tree uintmax_type_node; @@ -1383,7 +1383,7 @@ format_integer (const directive , tree arg, range_query *query) /* Try to determine the range of values of the integer argument (range information is not available for pointers). */ value_range vr; - query->range_of_expr (vr, arg, dir.info->callstmt); + ptr_qry.rvals->range_of_expr (vr, arg, dir.info->callstmt); if (!vr.varying_p () && !vr.undefined_p ()) { @@ -1414,7 +1414,7 @@ format_integer (const directive , tree arg, range_query *query) if (code == INTEGER_CST) { arg = gimple_assign_rhs1 (def); - return format_integer (dir, arg, query); + return format_integer (dir, arg, ptr_qry); } if (code == NOP_EXPR) @@ -1459,16 +1459,16 @@ format_integer (const directive , tree arg, range_query *query) /* For unsigned conversions/directives or signed when the minimum is positive, use the minimum and maximum to compute the shortest and longest output, respectively. */ - res.range.min = format_integer (dir, argmin, query).range.min; -
Re: Catch 'GIMPLE_DEBUG' misbehavior in OpenACC 'kernels' decomposition [PR100400, PR103836, PR104061] (was: Decompose OpenACC 'kernels' constructs into parts, a sequence of compute constructs)
On Wed, Jan 19, 2022 at 11:29:18PM +0100, Thomas Schwinge wrote: > (The pass is still disabled by default, by the way.) > > We've found that 'gcc/omp-oacc-kernels-decompose.cc' is currently not at > all considerate of 'GIMPLE_DEBUG' statements -- and it's not always > straight forward how to handle these (not rocket science either; but > needs proper understanding and testing). The general rule is that debug stmts shouldn't affect code generation decisions, so when deciding what to optimize/how, they should be ignored, and during actual transformation adjusted or worst case reset as needed. > Actually fixing it is a separate task, but it seems prudent to at least > catch it, and document via a few test cases. OK to push > "Catch 'GIMPLE_DEBUG' misbehavior in OpenACC 'kernels' decomposition > [PR100400, PR103836, PR104061]", see attached? > --- a/gcc/omp-oacc-kernels-decompose.cc > +++ b/gcc/omp-oacc-kernels-decompose.cc > @@ -1255,6 +1255,16 @@ decompose_kernels_region_body (gimple *kernels_region, > tree kernels_clauses) >gsi_next (_n); > >gimple *stmt = gsi_stmt (gsi); > + if (gimple_code (stmt) == GIMPLE_DEBUG) > + { > + if (flag_compare_debug_opt || flag_compare_debug) > + /* Let the usual '-fcompare-debug' analysis bail out, as > +necessary. */ > + ; > + else > + sorry_at (loc, "%qs not yet supported", > + gimple_code_name[gimple_code (stmt)]); > + } This is wrong. It shouldn't be dependent on flag_compare_debug* options, those are just debugging aids to verify that -g/-g0 don't affect code generation. With the above you'd pretend they don't, but they actually would (with -g you'd get sorry, without it it would compile fine). If this code is analysing whether the kernels region body should be decomposed or not, it should be if (is_gimple_debug (stmt)) continue; or whatever else to just ignore them (in some opts already during analysis phase we remember they are present and something about them, but not in a way that would actually affect the code generation decisions). And then when actually transforming it, it depends on what transformations are done to the variables/values referenced in the debug stmts. gimple_debug_bind_reset_value (stmt); update_stmt (stmt); is what resets them and can be used as last resort, it will keep saying that it describes some var, but will say that the var is optimized out. Jakub
Re: [PATCH v2 1/2] add -Wuse-after-free
On 1/11/2022 3:40 PM, Jason Merrill wrote: On 11/30/21 17:32, Martin Sebor via Gcc-patches wrote: Attached is a revised patch with the following changes based on your comments: 1) Set and use statement uids to determine which statement precedes which in the same basic block. 2) Avoid testing flag_isolate_erroneous_paths_dereference. 3) Use post-dominance to decide whether to use the "maybe" phrasing vs a definite form. David raised (and in our offline discussion today reiterated) an objection to the default setting of the option being the strictest. I have not changed that in this revision. See my rationale for this choice in my reply below: https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583176.html In the latest C2x draft I see in the list of undefined behavior "The value of a pointer that refers to space deallocated by a call to the free or realloc function is used (7.22.3)." So the case that would be technically undefined would be comparing the reallocated pointer to the old pointer which has been deallocated. The C++ draft is more nuanced: it says, "When the end of the duration of a region of storage is reached, the values of all pointers representing the address of any part of that region of storage become invalid pointer values (6.8.3). Indirection through an invalid pointer value and passing an invalid pointer value to a deallocation function have undefined behavior. Any other use of an invalid pointer value has implementation-defined behavior." So the case above is implementation-defined in C++, not undefined. Let's put =2 in -Wall for now. With that change, this and the -Wdangling-pointer patch are OK on Friday afternoon if there are no other comments before then. THanks for picking this up. I've been busier than expected the last several weeks. jeff
Re: [PATCH] c++: CTAD within alias template [PR91911]
On Wed, 19 Jan 2022, Jason Merrill wrote: > On 1/3/22 10:24, Patrick Palka wrote: > > On Wed, 22 Dec 2021, Jason Merrill wrote: > > > > > On 12/21/21 14:08, Patrick Palka wrote: > > > > On Tue, Dec 21, 2021 at 2:03 PM Patrick Palka wrote: > > > > > > > > > > On Wed, Jun 30, 2021 at 4:23 PM Jason Merrill > > > > > wrote: > > > > > > > > > > > > On 6/30/21 4:18 PM, Patrick Palka wrote: > > > > > > > On Wed, Jun 30, 2021 at 3:51 PM Jason Merrill > > > > > > > wrote: > > > > > > > > > > > > > > > > On 6/30/21 11:58 AM, Patrick Palka wrote: > > > > > > > > > On Wed, 30 Jun 2021, Patrick Palka wrote: > > > > > > > > > > > > > > > > > > > On Fri, 25 Jun 2021, Jason Merrill wrote: > > > > > > > > > > > > > > > > > > > > > On 6/25/21 1:11 PM, Patrick Palka wrote: > > > > > > > > > > > > On Fri, 25 Jun 2021, Jason Merrill wrote: > > > > > > > > > > > > > > > > > > > > > > > > > On 6/24/21 4:45 PM, Patrick Palka wrote: > > > > > > > > > > > > > > In the first testcase below, during parsing of the > > > > > > > > > > > > > > alias > > > > > > > > > > > > > > template > > > > > > > > > > > > > > ConstSpanType, transparency of alias template > > > > > > > > > > > > > > specializations means we > > > > > > > > > > > > > > replace SpanType with SpanType's substituted > > > > > > > > > > > > > > definition. But this > > > > > > > > > > > > > > substitution lowers the level of the CTAD > > > > > > > > > > > > > > placeholder > > > > > > > > > > > > > > for span(T()) from > > > > > > > > > > > > > > 2 to 1, and so the later instantiantion of > > > > > > > > > > > > > > ConstSpanType > > > > > > > > > > > > > > erroneously substitutes this CTAD placeholder with > > > > > > > > > > > > > > the > > > > > > > > > > > > > > template argument > > > > > > > > > > > > > > at level 1 index 0, i.e. with int, before we get a > > > > > > > > > > > > > > chance to perform the > > > > > > > > > > > > > > CTAD. > > > > > > > > > > > > > > > > > > > > > > > > > > > > In light of this, it seems we should avoid level > > > > > > > > > > > > > > lowering when > > > > > > > > > > > > > > substituting through through the type-id of a > > > > > > > > > > > > > > dependent > > > > > > > > > > > > > > alias template > > > > > > > > > > > > > > specialization. To that end this patch makes > > > > > > > > > > > > > > lookup_template_class_1 > > > > > > > > > > > > > > pass tf_partial to tsubst in this situation. > > > > > > > > > > > > > > > > > > > > > > > > > > This makes sense, but what happens if SpanType is a > > > > > > > > > > > > > member > > > > > > > > > > > > > template, so > > > > > > > > > > > > > that > > > > > > > > > > > > > the levels of it and ConstSpanType don't match? Or > > > > > > > > > > > > > the > > > > > > > > > > > > > other way around? > > > > > > > > > > > > > > > > > > > > > > > > If SpanType is a member template of say the class > > > > > > > > > > > > template A (and > > > > > > > > > > > > thus its level is greater than ConstSpanType): > > > > > > > > > > > > > > > > > > > > > > > >template > > > > > > > > > > > >struct A { > > > > > > > > > > > > template > > > > > > > > > > > > using SpanType = decltype(span(T())); > > > > > > > > > > > >}; > > > > > > > > > > > > > > > > > > > > > > > >template > > > > > > > > > > > >using ConstSpanType = span > > > > > > > > > > > A::SpanType::value_type>; > > > > > > > > > > > > > > > > > > > > > > > >using type = ConstSpanType; > > > > > > > > > > > > > > > > > > > > > > > > then this case luckily works even without the patch > > > > > > > > > > > > because > > > > > > > > > > > > instantiate_class_template now reuses the specialization > > > > > > > > > > > > A::SpanType > > > > > > > > > > > > that was formed earlier during instantiation of A, > > > > > > > > > > > > where we > > > > > > > > > > > > substitute only a single level of template arguments, so > > > > > > > > > > > > the > > > > > > > > > > > > level of > > > > > > > > > > > > the CTAD placeholder inside the defining-type-id of this > > > > > > > > > > > > specialization > > > > > > > > > > > > dropped from 3 to 2, so still more than the level of > > > > > > > > > > > > ConstSpanType. > > > > > > > > > > > > > > > > > > > > > > > > This luck is short-lived though, because if we replace > > > > > > > > > > > > A::SpanType with say A::SpanType > > > > > > > > > > > > then > > > > > > > > > > > > the testcase > > > > > > > > > > > > breaks again (without the patch) because we no longer > > > > > > > > > > > > can > > > > > > > > > > > > reuse that > > > > > > > > > > > > specialization, so we instead form it on the spot by > > > > > > > > > > > > substituting two > > > > > > > > > > > > levels of template arguments (U=int,T=T) into the > > > > > > > > > > > > defining-type-id, > > > > > > > > > > > > causing the level of the placeholder to drop to 1. I > > > > > > > > > > > > think > > > > > > > > > > > > the patch > > > > > > >
Catch 'GIMPLE_DEBUG' misbehavior in OpenACC 'kernels' decomposition [PR100400, PR103836, PR104061] (was: Decompose OpenACC 'kernels' constructs into parts, a sequence of compute constructs)
Hi! On 2020-11-13T23:22:30+0100, I wrote: > I've pushed to master branch [...] commit > e898ce7997733c29dcab9c3c62ca102c7f9fa6eb "Decompose OpenACC 'kernels' > constructs into parts, a sequence of compute constructs", see attached. > > On 2019-02-01T00:59:30+0100, I wrote: >> There's more work to be done there, and we're aware of a number of TODO >> items, but nevertheless: it's a good first step. > > That's still the case... :-) (The pass is still disabled by default, by the way.) We've found that 'gcc/omp-oacc-kernels-decompose.cc' is currently not at all considerate of 'GIMPLE_DEBUG' statements -- and it's not always straight forward how to handle these (not rocket science either; but needs proper understanding and testing). Actually fixing it is a separate task, but it seems prudent to at least catch it, and document via a few test cases. OK to push "Catch 'GIMPLE_DEBUG' misbehavior in OpenACC 'kernels' decomposition [PR100400, PR103836, PR104061]", see attached? Grüße Thomas - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955 >From 568808ef7ccc97ebeae90bc7cb1aba6bd7659b24 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 19 Jan 2022 14:04:42 +0100 Subject: [PATCH] Catch 'GIMPLE_DEBUG' misbehavior in OpenACC 'kernels' decomposition [PR100400, PR103836, PR104061] Actually fixing it is a separate task, but it seems prudent to at least catch it, and document via a few test cases. gcc/ PR middle-end/100400 PR middle-end/103836 PR middle-end/104061 * omp-oacc-kernels-decompose.cc (decompose_kernels_region_body): Catch 'GIMPLE_DEBUG'. gcc/testsuite/ PR middle-end/100400 PR middle-end/103836 PR middle-end/104061 * c-c++-common/goacc/kernels-decompose-pr100400-1-1.c: New. * c-c++-common/goacc/kernels-decompose-pr100400-1-2.c: New. * c-c++-common/goacc/kernels-decompose-pr100400-1-3.c: New. * c-c++-common/goacc/kernels-decompose-pr100400-1-4.c: New. * c-c++-common/goacc/kernels-decompose-pr103836-1-1.c: New. * c-c++-common/goacc/kernels-decompose-pr103836-1-2.c: New. * c-c++-common/goacc/kernels-decompose-pr103836-1-3.c: New. * c-c++-common/goacc/kernels-decompose-pr103836-1-4.c: New. * c-c++-common/goacc/kernels-decompose-pr104061-1-1.c: New. * c-c++-common/goacc/kernels-decompose-pr104061-1-2.c: New. * c-c++-common/goacc/kernels-decompose-pr104061-1-3.c: New. * c-c++-common/goacc/kernels-decompose-pr104061-1-4.c: New. --- gcc/omp-oacc-kernels-decompose.cc | 10 + .../goacc/kernels-decompose-pr100400-1-1.c| 33 ++ .../goacc/kernels-decompose-pr100400-1-2.c| 40 + .../goacc/kernels-decompose-pr100400-1-3.c| 42 ++ .../goacc/kernels-decompose-pr100400-1-4.c| 40 + .../goacc/kernels-decompose-pr103836-1-1.c| 26 +++ .../goacc/kernels-decompose-pr103836-1-2.c| 29 + .../goacc/kernels-decompose-pr103836-1-3.c| 30 + .../goacc/kernels-decompose-pr103836-1-4.c| 30 + .../goacc/kernels-decompose-pr104061-1-1.c| 30 + .../goacc/kernels-decompose-pr104061-1-2.c| 33 ++ .../goacc/kernels-decompose-pr104061-1-3.c| 43 +++ .../goacc/kernels-decompose-pr104061-1-4.c| 41 ++ 13 files changed, 427 insertions(+) create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100400-1-1.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100400-1-2.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100400-1-3.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100400-1-4.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr103836-1-1.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr103836-1-2.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr103836-1-3.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr103836-1-4.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr104061-1-1.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr104061-1-2.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr104061-1-3.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr104061-1-4.c diff --git a/gcc/omp-oacc-kernels-decompose.cc b/gcc/omp-oacc-kernels-decompose.cc index 21872db3ed3..98eafdbe3a1 100644 --- a/gcc/omp-oacc-kernels-decompose.cc +++ b/gcc/omp-oacc-kernels-decompose.cc @@ -1255,6 +1255,16 @@ decompose_kernels_region_body (gimple *kernels_region, tree kernels_clauses) gsi_next (_n); gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_DEBUG) + { + if
Re: [PATCH] Fortran: Fix scope for OMP AFFINITY clause iterator variables [PR103695]
Hi Sandra, This patch is for PR103695, marked as a P1 regression. OK to check in? I'm not an OpenMP expert, but this looks straightforward enough. I assume you ran a regression-test? OK if that is the case. Thanks for the patch! Best regards Thomas
[PATCH] Fortran: Fix scope for OMP AFFINITY clause iterator variables [PR103695]
This patch is for PR103695, marked as a P1 regression. OK to check in? -Sandra commit 21f8ac540b73e3838b63924e3c7e6c2ad25568ee Author: Sandra Loosemore Date: Wed Jan 19 12:50:49 2022 -0800 Fortran: Fix scope for OMP AFFINITY clause iterator variables [PR103695] gfc_finish_var_decl was confused by the undocumented overloading of the proc_name field in struct gfc_namespace to contain iterator variables for the OpenMP AFFINITY clause, causing it to insert the decls in the wrong scope. This patch adds a new distinct field to hold these variables. 2022-01-19 Sandra Loosemore PR fortran/103695 gcc/fortran * gfortran.h (struct gfc_namespace) Add omp_affinity_iterator field. * dump-parse-tree.cc (show_iterator): Use it. * openmp.cc (gfc_match_iterator): Likewise. (resolve_omp_clauses): Likewise. * trans-decl.cc (gfc_finish_var_decl): Likewise. * trans-openmp.cc (handle_iterator): Likewise. gcc/testsuite/ * gfortran.dg/gomp/affinity-clause-3.f90: Adjust pattern. * gfortran.dg/gomp/pr103695.f90: New. diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc index a618ae2..3112cae 100644 --- a/gcc/fortran/dump-parse-tree.cc +++ b/gcc/fortran/dump-parse-tree.cc @@ -1302,10 +1302,10 @@ show_code (int level, gfc_code *c) static void show_iterator (gfc_namespace *ns) { - for (gfc_symbol *sym = ns->proc_name; sym; sym = sym->tlink) + for (gfc_symbol *sym = ns->omp_affinity_iterators; sym; sym = sym->tlink) { gfc_constructor *c; - if (sym != ns->proc_name) + if (sym != ns->omp_affinity_iterators) fputc (',', dumpfile); fputs (sym->name, dumpfile); fputc ('=', dumpfile); diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 00a558a..993879f 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -2107,6 +2107,9 @@ typedef struct gfc_namespace /* !$ACC ROUTINE clauses. */ gfc_omp_clauses *oacc_routine_clauses; + /* !$ACC TASK AFFINITY iterator symbols. */ + gfc_symbol *omp_affinity_iterators; + /* !$ACC ROUTINE names. */ gfc_oacc_routine_name *oacc_routine_names; diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc index 9b73b9f..073e5a1 100644 --- a/gcc/fortran/openmp.cc +++ b/gcc/fortran/openmp.cc @@ -1123,7 +1123,7 @@ gfc_match_iterator (gfc_namespace **ns, bool permit_var) if (last) last->tlink = sym; else - (*ns)->proc_name = sym; + (*ns)->omp_affinity_iterators = sym; last = sym; sym->declared_at = prev_loc; sym->ts = ts; @@ -6832,8 +6832,8 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, && n->u2.ns && !n->u2.ns->resolved) { n->u2.ns->resolved = 1; - for (gfc_symbol *sym = n->u2.ns->proc_name; sym; - sym = sym->tlink) + for (gfc_symbol *sym = n->u2.ns->omp_affinity_iterators; + sym; sym = sym->tlink) { gfc_constructor *c; c = gfc_constructor_first (sym->value->value.constructor); diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index 1112ca9..6493cc2 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -647,6 +647,9 @@ gfc_finish_var_decl (tree decl, gfc_symbol * sym) && sym->ns->proc_name->attr.flavor == FL_LABEL) /* This is a BLOCK construct. */ add_decl_as_local (decl); + else if (sym->ns->omp_affinity_iterators) + /* This is a block-local iterator. */ + add_decl_as_local (decl); else gfc_add_decl_to_parent_function (decl); } diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 9eabf68..d5a6b2d 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -2483,7 +2483,7 @@ static tree handle_iterator (gfc_namespace *ns, stmtblock_t *iter_block, tree block) { tree list = NULL_TREE; - for (gfc_symbol *sym = ns->proc_name; sym; sym = sym->tlink) + for (gfc_symbol *sym = ns->omp_affinity_iterators; sym; sym = sym->tlink) { gfc_constructor *c; gfc_se se; diff --git a/gcc/testsuite/gfortran.dg/gomp/affinity-clause-3.f90 b/gcc/testsuite/gfortran.dg/gomp/affinity-clause-3.f90 index 3fd39fe..eebe4dd 100644 --- a/gcc/testsuite/gfortran.dg/gomp/affinity-clause-3.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/affinity-clause-3.f90 @@ -11,4 +11,4 @@ subroutine foo !$omp end task end ! { dg-final { scan-tree-dump-times "= ibar \\(\\." 3 "gimple" } } -! { dg-final { scan-tree-dump-times "= ibar \\(j\\." 1 "gimple" } } +! { dg-final { scan-tree-dump-times "= ibar \\(" 1 "gimple" } } diff --git a/gcc/testsuite/gfortran.dg/gomp/pr103695.f90 b/gcc/testsuite/gfortran.dg/gomp/pr103695.f90 new file mode 100644 index 000..cc9764e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/pr103695.f90 @@ -0,0 +1,18 @@ +! This test case used to ICE in verify_ssa due to the iterator variable j +! incorrectly being inserted into program scope. + +program p +
Re: [PATCH] mips: Improved RTL representation of wsbh/dsbh/dshd
Hi Roger, > This patch to the mips backend updates the representations used > internally for MIPS' wsbh, dsbh and dshd instructions. These were > previously described using an UNSPEC rtx, which prevents simplification > at the RTL level. In addition to now being able to eliminate rotate > instructions before/after wsbh, allowing a wsbh to be emitted without > a backend builtin, these new representations also allow dsbh and dshd > to be synthesized from standard C/C++ vector idioms. I came across your submission and while it is not a proper review I've noticed a couple of issues as below. > diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md > index 455b9b8..21364d6 100644 > --- a/gcc/config/mips/mips.md > +++ b/gcc/config/mips/mips.md [...] > + > +;; Non-canonical variant of wsbh > +(define_insn "wsbh_2" This insn is only ever matched by its RTL pattern and is nowhere referred to by its name, but giving it a callable one makes extra code produced to make a call possible (via `gen_wsbh_2'). Can you prevent useless callable code from being produced by giving the pattern a debug name instead such as `*wsbh_2'? > +;; V4QI form of wsbh > +(define_insn "wsbh_v4qi" Likewise here and throughout for other new insns. Please note that you need a full stop at the end of sentences in comments to comply with the GNU coding style. > --- /dev/null > +++ b/gcc/testsuite/gcc.target/mips/dsbh-v8qi.c > @@ -0,0 +1,12 @@ > +/* { dg-options "isa_rev>=2 -mgp64" } */ > + > +typedef char v8qi __attribute__((vector_size (8))); > + > +long long foo(long long x) > +{ > + v8qi t = (v8qi)x; > + t = __builtin_shufflevector (t, t, 1, 0, 3, 2, 5, 4, 7, 6); > + return (long long)t; > +} > + > +/* { dg-final { scan-assembler "\tdsbh\t" } } */ Your new test cases do not follow the GNU coding style, can you please run them through `indent -gnu'? Maciej
Ping^5: [PATCH, rs6000 V2] rotate and mask constants [PR94393]
Ping. I'll note that I recently discovered that this patch also fixes PR93176 and PR97042. -Pat On 11/22/21 1:38 PM, Pat Haugen via Gcc-patches wrote: > Updated version of the patch. Changes made from original are updated > commentary to hopefully aid readability, no functional changes. > > > Implement more two insn constants. rotate_and_mask_constant covers > 64-bit constants that can be formed by rotating a 16-bit signed > constant, rotating a 16-bit signed constant masked on left or right > (rldicl and rldicr), rotating a 16-bit signed constant masked by > rldic, and unusual "lis; rldicl" and "lis; rldicr" patterns. All the > values possible for DImode rs6000_is_valid_and_mask are covered. > > Bootstrapped and regression tested on powerpc64(32/64) and powerpc64le. > Ok for master? > > -Pat > > > 2021-11-22 Alan Modra > Pat Haugen > > PR 94393 > gcc/ > * config/rs6000/rs6000.c (rotate_di, is_rotate_positive_constant, > is_rotate_negative_constant, rotate_and_mask_constant): New functions. > (num_insns_constant_multi, rs6000_emit_set_long_const): Use it here. > * config/rs6000/rs6000.md (*movdi_internal64+1 splitter): Delete. > gcc/testsuite/ > * gcc.target/powerpc/rot_cst.h, > * gcc.target/powerpc/rot_cst1.c, > * gcc.target/powerpc/rot_cst2.c: New tests.
[PATCH] libstdc++: Simplify build targets for debug library
Posting for review now, but I will wait until stage 1 to push this. This rewrites the stamp-debug and build-debug targets in src/Makefile so that each generated Makefile in the debug/$(SUBDIRS) directories is a make target, instead of being created by a loop in the stamp-debug recipe. The final adjustments to debug/Makefile are done as part of the stamp-debug target instead of the build-debug target. The advantage is that each $(SUBDIRS)/debug/Makefile now has the corresponding $(SUBDIRS)/Makefile as a prerequisite, so they will be regenerated if needed. Generating those can also be parallelized by make, although those steps are very fast so that doesn't really matter. This also removes the duplication in the stamp-debug recipe, which was using exactly the same sed command for debug/Makefile and each debug/$(SUBDIRS)/Makefile. That is done by adding "." to the list of subdirectories to process. The recipes can also be simplified to use separate shell commands per line, instead of using backslashes to join the whole recipe into a single shell command. Also replace 'echo `date` > stamp-xxx' with just 'date > stamp-xxx' which is equivalent but simpler. libstdc++-v3/ChangeLog: * src/Makefile.am: Simplify debug build targets. * src/Makefile.in: Regenerate. --- libstdc++-v3/src/Makefile.am | 65 ++--- libstdc++-v3/src/Makefile.in | 70 +++- 2 files changed, 56 insertions(+), 79 deletions(-) diff --git a/libstdc++-v3/src/Makefile.am b/libstdc++-v3/src/Makefile.am index 18f57632c3d..2a9f7c157fa 100644 --- a/libstdc++-v3/src/Makefile.am +++ b/libstdc++-v3/src/Makefile.am @@ -30,10 +30,8 @@ endif if ENABLE_BACKTRACE backtrace_dir = libbacktrace -backtrace_supported_h = $(backtrace_dir)/backtrace-supported.h else backtrace_dir = -backtrace_supported_h = endif ## Keep this list sync'd with acinclude.m4:GLIBCXX_CONFIGURE. @@ -362,7 +360,7 @@ libstdc++convenience.la: $(toolexeclib_LTLIBRARIES) if test ! -f .libs/libstdc++.a; then \ cp .libs/libstdc++convenience.a .libs/libstdc++.a; \ fi; \ - echo `date` > stamp-libstdc++convenience; + date > stamp-libstdc++convenience; # Added rules. # 1 debug library @@ -378,50 +376,41 @@ CLEAN_DEBUG = endif # Build a debug variant. + # Take care to fix all possibly-relative paths. -debugdir = ${glibcxx_builddir}/src/debug -stamp-debug: Makefile $(foreach dir,$(SUBDIRS),$(dir)/Makefile) - if test ! -d ${debugdir} || test ! -f ${debugdir}/Makefile ; then \ - mkdir -p ${debugdir}; \ - for d in $(SUBDIRS); do mkdir -p ${debugdir}/$$d; done; \ - (cd ${debugdir}; \ - sed -e 's/top_builddir = \.\./top_builddir = ..\/../' \ - -e 's/top_build_prefix = \.\./top_build_prefix = ..\/../' \ - -e 's/srcdir = \.\./srcdir = ..\/../' \ - -e 's/VPATH = \.\./VPATH = ..\/../' \ - -e 's/glibcxx_basedir = \.\./glibcxx_basedir = ..\/../' \ - -e 's/MKDIR_P = \.\./MKDIR_P = ..\/../' \ - < ../Makefile > Makefile ; \ - for d in . $(SUBDIRS); do \ - sed -e 's/top_builddir = \.\./top_builddir = ..\/../' \ - -e 's/top_build_prefix = \.\./top_build_prefix = ..\/../' \ - -e 's/srcdir = \.\./srcdir = ..\/../' \ - -e 's/VPATH = \.\./VPATH = ..\/../' \ - -e 's/glibcxx_basedir = \.\./glibcxx_basedir = ..\/../' \ - -e 's/MKDIR_P = \.\./MKDIR_P = ..\/../' \ - < ../$$d/Makefile > $$d/Makefile ; \ - done) ; \ - fi; \ - echo `date` > stamp-debug; +debug/%/Makefile: %/Makefile + $(MKDIR_P) $(dir $@) + sed -e 's/top_builddir = \.\./top_builddir = ..\/../' \ + -e 's/top_build_prefix = \.\./top_build_prefix = ..\/../' \ + -e 's/srcdir = \.\./srcdir = ..\/../' \ + -e 's/VPATH = \.\./VPATH = ..\/../' \ + -e 's/glibcxx_basedir = \.\./glibcxx_basedir = ..\/../' \ + -e 's/MKDIR_P = \.\./MKDIR_P = ..\/../' \ + < $< > $@ + +stamp-debug: $(foreach dir,. $(SUBDIRS),debug/$(dir)/Makefile) + mv debug/Makefile debug/Makefile.tmp + sed -e 's,all-local: all-once,all-local:,' \ + -e 's,install-data-local: install-data-once,install-data-local:,' \ + -e '/vpath/!s,src/c,src/debug/c,' \ + < debug/Makefile.tmp > debug/Makefile + rm -f debug/Makefile.tmp + date > stamp-debug; + +.SECONDARY: $(foreach dir,. $(SUBDIRS),debug/$(dir)/Makefile) if ENABLE_BACKTRACE -${debugdir}/$(backtrace_supported_h): $(backtrace_supported_h) stamp-debug +backtrace_supported_h = $(backtrace_dir)/backtrace-supported.h +debug_backtrace_supported_h = debug/$(backtrace_supported_h) +$(debug_backtrace_supported_h): $(backtrace_supported_h) stamp-debug cp $< $@ -debug_backtrace_supported_h = ${debugdir}/$(backtrace_supported_h) else debug_backtrace_supported_h = endif build-debug: stamp-debug
[committed] libstdc++: Remove -gdwarf-4 from flags for debug library
Tested x86_64-linux, pushed to trunk. Worth backporting to gcc-11 too. The default is -gdwarf-5 now, so this is hurting rather than improving things. libstdc++-v3/ChangeLog: * configure.ac (GLIBCXX_ENABLE_DEBUG_FLAGS): Remove -gdwarf-4 from default flags. * configure: Regenerate. --- libstdc++-v3/configure| 4 ++-- libstdc++-v3/configure.ac | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac index 2a3bc520501..e59bcdb2944 100644 --- a/libstdc++-v3/configure.ac +++ b/libstdc++-v3/configure.ac @@ -172,7 +172,7 @@ GLIBCXX_ENABLE_LONG_LONG([yes]) GLIBCXX_ENABLE_WCHAR_T([yes]) GLIBCXX_ENABLE_C99([yes]) GLIBCXX_ENABLE_CONCEPT_CHECKS([no]) -GLIBCXX_ENABLE_DEBUG_FLAGS(["-gdwarf-4 -g3 -O0 -D_GLIBCXX_ASSERTIONS"]) +GLIBCXX_ENABLE_DEBUG_FLAGS(["-g3 -O0 -D_GLIBCXX_ASSERTIONS"]) GLIBCXX_ENABLE_DEBUG([no]) GLIBCXX_ENABLE_PARALLEL([yes]) GLIBCXX_ENABLE_CXX_FLAGS -- 2.31.1
Re: [PATCH] c++: CTAD within alias template [PR91911]
On 1/3/22 10:24, Patrick Palka wrote: On Wed, 22 Dec 2021, Jason Merrill wrote: On 12/21/21 14:08, Patrick Palka wrote: On Tue, Dec 21, 2021 at 2:03 PM Patrick Palka wrote: On Wed, Jun 30, 2021 at 4:23 PM Jason Merrill wrote: On 6/30/21 4:18 PM, Patrick Palka wrote: On Wed, Jun 30, 2021 at 3:51 PM Jason Merrill wrote: On 6/30/21 11:58 AM, Patrick Palka wrote: On Wed, 30 Jun 2021, Patrick Palka wrote: On Fri, 25 Jun 2021, Jason Merrill wrote: On 6/25/21 1:11 PM, Patrick Palka wrote: On Fri, 25 Jun 2021, Jason Merrill wrote: On 6/24/21 4:45 PM, Patrick Palka wrote: In the first testcase below, during parsing of the alias template ConstSpanType, transparency of alias template specializations means we replace SpanType with SpanType's substituted definition. But this substitution lowers the level of the CTAD placeholder for span(T()) from 2 to 1, and so the later instantiantion of ConstSpanType erroneously substitutes this CTAD placeholder with the template argument at level 1 index 0, i.e. with int, before we get a chance to perform the CTAD. In light of this, it seems we should avoid level lowering when substituting through through the type-id of a dependent alias template specialization. To that end this patch makes lookup_template_class_1 pass tf_partial to tsubst in this situation. This makes sense, but what happens if SpanType is a member template, so that the levels of it and ConstSpanType don't match? Or the other way around? If SpanType is a member template of say the class template A (and thus its level is greater than ConstSpanType): template struct A { template using SpanType = decltype(span(T())); }; template using ConstSpanType = span::SpanType::value_type>; using type = ConstSpanType; then this case luckily works even without the patch because instantiate_class_template now reuses the specialization A::SpanType that was formed earlier during instantiation of A, where we substitute only a single level of template arguments, so the level of the CTAD placeholder inside the defining-type-id of this specialization dropped from 3 to 2, so still more than the level of ConstSpanType. This luck is short-lived though, because if we replace A::SpanType with say A::SpanType then the testcase breaks again (without the patch) because we no longer can reuse that specialization, so we instead form it on the spot by substituting two levels of template arguments (U=int,T=T) into the defining-type-id, causing the level of the placeholder to drop to 1. I think the patch causes its level to remain 3 (though I guess it should really be 2). For the other way around, if ConstSpanType is a member template of say the class template B (and thus its level is greater than SpanType): template using SpanType = decltype(span(T())); template struct B { template using ConstSpanType = span::value_type>; }; using type = B::ConstSpanType; then tf_partial doesn't help here at all; we end up substituting 'int' for the CTAD placeholder... What it seems we need is to _increase_ the level of the CTAD placeholder from 2 to 3 during the dependent substitution.. Hmm, rather than messing with tf_partial, which is apparently only a partial solution, maybe we should just make tsubst never substitute a CTAD placeholder -- they should always be resolved from do_class_deduction, and their level doesn't really matter otherwise. (But we'd still want to substitute into the CLASS_PLACEHOLDER_TEMPLATE of the placeholder in case it's a template template parm.) Something like: diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 5107bfbf9d1..dead651ed84 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -15552,7 +15550,8 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) levels = TMPL_ARGS_DEPTH (args); if (level <= levels - && TREE_VEC_LENGTH (TMPL_ARGS_LEVEL (args, level)) > 0) + && TREE_VEC_LENGTH (TMPL_ARGS_LEVEL (args, level)) > 0 + && !template_placeholder_p (t)) { arg = TMPL_ARG (args, level, idx); seems to work better. Makes sense. Here's a patch that implements that. I reckon it's good to have both workarounds in place because the tf_partial workaround is necessary to accept class-deduction93a.C below, and the tsubst workaround is necessary to accept class-deduction-92b.C below. Whoops, forgot to git-add class-deduction93a.C: -- >8 -- Subject: [PATCH] c++: CTAD within alias template [PR91911] In the first testcase below, during parsing of the alias template ConstSpanType, transparency of alias template specializations means we replace SpanType with SpanType's substituted definition. But this substitution lowers the level of the CTAD placeholder for span{T()} from 2 to 1, and so the later instantiation of ConstSpanType erroneously substitutes this CTAD placeholder with the template
Re: [PATCH] c++: non-dependent immediate member fn call [PR99895]
On 1/19/22 11:15, Patrick Palka wrote: Here we're emitting a bogus error during ahead of time evaluation of a non-dependent immediate member function call such as a.f(args) because the defacto templated form for such a call is (a.f)(args) but we're trying to evaluate it using the intermediate CALL_EXPR built by build_over_call, which has the non-member form f(a, args). The defacto member form is built in build_new_method_call, so it seems we should handle the immediate call there instead. Hmm, there's already a bunch of code in build_over_call to try to fix up the object argument, and there seem to be many places other than build_new_method_call that call build_over_call for member functions; I think it's probably better to build the needed COMPONENT_REF in build_over_call. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps 11? PR c++/99895 gcc/cp/ChangeLog: * call.cc (build_over_call): Don't evaluate non-dependent immediate member function calls here. (build_new_method_call): Instead evaluate them here. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/consteval-memfn1.C: New test. * g++.dg/cpp2a/consteval-memfn2.C: New test. --- gcc/cp/call.cc| 9 - gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C | 15 gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C | 34 +++ 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index d4a07a7a9b3..0583cc0083b 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -9241,7 +9241,10 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) addr, nargs, argarray); if (TREE_THIS_VOLATILE (fn) && cfun) current_function_returns_abnormally = 1; - if (immediate_invocation_p (fn, nargs)) + if (!DECL_FUNCTION_MEMBER_P (fn) + /* Non-dependent immediate member function calls are evaluated in +build_new_method_call. */ + && immediate_invocation_p (fn, nargs)) { tree obj_arg = NULL_TREE, exprimm = expr; if (DECL_CONSTRUCTOR_P (fn)) @@ -11227,6 +11230,10 @@ skip_prune: call = convert_from_reference (call); if (cast_to_void) call = build_nop (void_type_node, call); + + if (immediate_invocation_p (fn, vec_safe_length (orig_args))) + fold_non_dependent_expr (call, complain, +/*manifestly_const_eval=*/true); } /* Free all the conversions we allocated. */ diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C b/gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C new file mode 100644 index 000..d2df2e9b5ae --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C @@ -0,0 +1,15 @@ +// PR c++/99895 +// { dg-do compile { target c++20 } } + +struct fixed_string { + consteval int size(int n) const { +if (n < 0) throw; // { dg-error "not a constant" } +return n; + } +}; + +template +void VerifyHash(fixed_string s) { + s.size(0); // { dg-bogus "" } + s.size(-1); // { dg-message "expansion of" } +} diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C b/gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C new file mode 100644 index 000..71748f46b13 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C @@ -0,0 +1,34 @@ +// PR c++/99895 +// { dg-do compile { target c++20 } } + +static constexpr unsigned hash(const char* s, unsigned length) +{ +s=s; +return length; +} +template +struct fixed_string +{ +constexpr fixed_string(const char ()[N]) +{ +for (int i = 0; i < N; i++) +str[i] = s[i]; +} +consteval const char* data() const { return str; } +consteval unsigned size() const { return N-1; } +char str[N]; +}; +template +static consteval void VerifyHash() +{ +( + [](auto){static_assert(hash(s.data(), s.size()) == expected_hash);}(s) +,...); +// The compiler mistakenly translates s.data() into s.data() +// and then complains that the call is not valid, because +// the function expects 0 parameters and 1 "was provided". +} +void foo() +{ +VerifyHash<5, "khaki", "plums">(); +}
Re: [PATCH v3 06/15] arm: Fix mve_vmvnq_n_ argument mode
On 13/01/2022 14:56, Christophe Lyon via Gcc-patches wrote: The vmvnq_n* intrinsics and have [u]int[16|32]_t arguments, so use iterator instead of HI in mve_vmvnq_n_. 2022-01-13 Christophe Lyon gcc/ * config/arm/mve.md (mve_vmvnq_n_): Use V_elem mode for operand 1. diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 171dd384133..5c3b34dce3a 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -617,7 +617,7 @@ (define_insn "mve_vcvtaq_" (define_insn "mve_vmvnq_n_" [ (set (match_operand:MVE_5 0 "s_register_operand" "=w") - (unspec:MVE_5 [(match_operand:HI 1 "immediate_operand" "i")] + (unspec:MVE_5 [(match_operand: 1 "immediate_operand" "i")] VMVNQ_N)) ] "TARGET_HAVE_MVE" While fixing this it might be good to fix the constraint and predicate inspired by "DL" and "neon_inv_logic_op2" respectively. This would avoid the compiler generating wrong assembly, and instead it would probably lead to the compiler using a load literal. I kind of think it would be better to have the intrinsic refuse the immediate altogether, but it seems for NEON we also use the load literal approach.
[PATCH v3] match.pd: Simplify 1 / X for integer X [PR95424]
This patch implements an optimization for the following C++ code: int f(int x) { return 1 / x; } int f(unsigned int x) { return 1 / x; } Before this patch, x86-64 gcc -std=c++20 -O3 produces the following assembly: f(int): xor edx, edx mov eax, 1 idiv edi ret f(unsigned int): xor edx, edx mov eax, 1 div edi ret In comparison, clang++ -std=c++20 -O3 produces the following assembly: f(int): lea ecx, [rdi + 1] xor eax, eax cmp ecx, 3 cmovb eax, edi ret f(unsigned int): xor eax, eax cmp edi, 1 sete al ret Clang's output is more efficient as it avoids expensive div operations. With this patch, GCC now produces the following assembly: f(int): lea eax, [rdi + 1] cmp eax, 2 mov eax, 0 cmovbe eax, edi ret f(unsigned int): xor eax, eax cmp edi, 1 sete al ret which is virtually identical to Clang's assembly output. Any slight differences in the output for f(int) is possibly related to a different missed optimization. v2: https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587751.html Changes from v2: 1. Refactor from using a switch statement to using the built-in if-else statement. v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587634.html Changes from v1: 1. Refactor common if conditions. 2. Use build_[minus_]one_cst (type) to get -1/1 of the correct type. 3. Match only for TRUNC_DIV_EXPR and TYPE_PRECISION (type) > 1. gcc/ChangeLog: * match.pd: Simplify 1 / X where X is an integer. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/divide-6.c: New test. * gcc.dg/tree-ssa/divide-7.c: New test. --- gcc/match.pd | 13 + gcc/testsuite/gcc.dg/tree-ssa/divide-6.c | 9 + gcc/testsuite/gcc.dg/tree-ssa/divide-7.c | 9 + 3 files changed, 31 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-6.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-7.c diff --git a/gcc/match.pd b/gcc/match.pd index 84c9b918041ee..4cd692c863d0c 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -432,6 +432,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && TYPE_UNSIGNED (type)) (trunc_div @0 @1))) + /* 1 / X -> X == 1 for unsigned integer X. +1 / X -> X >= -1 && X <= 1 ? X : 0 for signed integer X. +But not for 1 / 0 so that we can get proper warnings and errors, +and not for 1-bit integers as they are edge cases better handled elsewhere. */ +(simplify + (trunc_div integer_onep@0 @1) + (if (INTEGRAL_TYPE_P (type) && !integer_zerop (@1) && TYPE_PRECISION (type) > 1) +(if (TYPE_UNSIGNED (type)) + (eq @1 { build_one_cst (type); }) + (with { tree utype = unsigned_type_for (type); } +(cond (le (plus (convert:utype @1) { build_one_cst (utype); }) { build_int_cst (utype, 2); }) + @1 { build_zero_cst (type); }) + /* Combine two successive divisions. Note that combining ceil_div and floor_div is trickier and combining round_div even more so. */ (for div (trunc_div exact_div) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c b/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c new file mode 100644 index 0..a9fc4c04058c6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +unsigned int f(unsigned int x) { + return 1 / x; +} + +/* { dg-final { scan-tree-dump-not "1 / x_..D.;" "optimized" } } */ +/* { dg-final { scan-tree-dump "x_..D. == 1;" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c b/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c new file mode 100644 index 0..285279af7c210 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +int f(int x) { + return 1 / x; +} + +/* { dg-final { scan-tree-dump-not "1 / x_..D.;" "optimized" } } */ +/* { dg-final { scan-tree-dump ".. <= 2 ? x_..D. : 0;" "optimized" } } */ -- 2.17.1
Re: [PATCH] tree-optimization/103721 - Only add equivalencies that are still valid.
On 1/19/22 04:33, Richard Biener wrote: On Wed, Jan 19, 2022 at 2:37 AM Andrew MacLeod via Gcc-patches wrote: OK for trunk? OK. I don't quite understand how what you describe above works, it sounds a bit odd with respect to the idea that equivalences should be transitive but The transitive check is what prevents us from having to find and update all the equivalence sets when a name needs to be removed. we can simply create a new equivalence with that name, and all the older equivalences in the dom tree will no longer equate with it and are eliminated by the query. With the nature of on-demand, its possible for equivalences to get created in unexpected orders, and logging all the equivalences as they are seen and leaving the final determination to query time seems to be the easiest and most accurate way to get results. I suspect we miss a few relations if we process things in a random order, but we shouldn't get anything wrong. I should note that forming equivalences from PHI nodes with backedges is not possible without being very careful since you will easily end up equating _1 and _1 from different iterations (and thus with different value). The dominator search version used by ranger won't create equivalences from back edges normally because the back edge doesn't dominate the block. The only time we could get an equivalence from a back edge would be if all the other arguments to a PHI at the top of the loop were undefined, or the same value as came in on the back edge ie top_5 = PHI would create an equivalence between top_5 and val_6... but that's OK because it is just a copy then anyway. or top_5 = PHI This will create an equivalence between top_5 and val_6 in the loop, until we reach the point where val_6 is defined, and then the equivalence will get killed. its possible that might cause an issue in a single BB loop, If I could reproduce that... let me experiment. In which case I'll simply disable equivalences applied to PHIs if its driven by just a back edge. I dont see any other way we can get an equivalence/relation from a back edge with the oracle (other than what the threader does, it has its own oracle extensions for paths) Its on my task list to document the entire oracle mechanism for both equivalences and relations in the next month or two. Andrew
Re: [PATCH v3 05/15] arm: Add support for VPR_REG in arm_class_likely_spilled_p
On 13/01/2022 14:56, Christophe Lyon via Gcc-patches wrote: VPR_REG is the only register in its class, so it should be handled by TARGET_CLASS_LIKELY_SPILLED_P, which is achieved by calling default_class_likely_spilled_p. No test fails without this patch, but it seems it should be implemented. 2022-01-13 Christophe Lyon gcc/ * config/arm/arm.c (arm_class_likely_spilled_p): Handle VPR_REG. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index c3559ca8703..64a8f2dc7de 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -29317,7 +29317,7 @@ arm_class_likely_spilled_p (reg_class_t rclass) || rclass == CC_REG) return true; - return false; + return default_class_likely_spilled_p (rclass); } /* Implements target hook small_register_classes_for_mode_p. */ LGTM, but await reviewer approval. I suspect this would help avoiding spilling of other special registers, though I'm not sure we codegen any enough to make a difference, which is why it is likely to have no effect on anything else.
Re: [PATCH v3 04/15] arm: Add GENERAL_AND_VPR_REGS regclass
Hi Christophe, On 13/01/2022 14:56, Christophe Lyon via Gcc-patches wrote: At some point during the development of this patch series, it appeared that in some cases the register allocator wants “VPR or general” rather than “VPR or general or FP” (which is the same thing as ALL_REGS). The series does not seem to require this anymore, but it seems to be a good thing to do anyway, to give the register allocator more freedom. Not sure I fully understand this, but I guess it creates an extra class the register allocator can use to group things that can go into VPR or general reg? CLASS_MAX_NREGS and arm_hard_regno_nregs need adjustment to avoid a regression in gcc.dg/stack-usage-1.c when compiled with -mthumb -mfloat-abi=hard -march=armv8.1-m.main+mve.fp+fp.dp. I have not looked into this failure, but ... 2022-01-13 Christophe Lyon gcc/ * config/arm/arm.h (reg_class): Add GENERAL_AND_VPR_REGS. (REG_CLASS_NAMES): Likewise. (REG_CLASS_CONTENTS): Likewise. (CLASS_MAX_NREGS): Handle VPR. * config/arm/arm.c (arm_hard_regno_nregs): Handle VPR. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index bb75921f32d..c3559ca8703 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -25287,6 +25287,9 @@ thumb2_asm_output_opcode (FILE * stream) static unsigned int arm_hard_regno_nregs (unsigned int regno, machine_mode mode) { + if (IS_VPR_REGNUM (regno)) +return CEIL (GET_MODE_SIZE (mode), 2); When do we ever want to use more than 1 register for VPR? @@ -1453,7 +1456,9 @@ extern const char *fp_sysreg_names[NB_FP_SYSREGS]; ARM regs are UNITS_PER_WORD bits. FIXME: Is this true for iWMMX? */ #define CLASS_MAX_NREGS(CLASS, MODE) \ - (ARM_NUM_REGS (MODE)) + (CLASS == VPR_REG) \ + ? CEIL (GET_MODE_SIZE (MODE), 2)\ + : (ARM_NUM_REGS (MODE)) Same.
[PATCH v2] rs6000: More factoring of overload processing
Hi! [I'm resubmitting this because the filename changed with the recent conversion from .c to .cc.] This patch continues the refactoring started with r12-6014. I had previously noted that the resolve_vec* routines can be further simplified by processing the argument list earlier, so that all routines can use the arrays of arguments and types. I found that this was useful for some of the routines, but not for all of them. For several of the special-cased overloads, we don't specify all of the possible type combinations in rs6000-overload.def, because the types don't matter for the expansion we do. For these, we can't use generic error message handling when the number of arguments is incorrect, because the result is misleading error messages that indicate argument types are wrong. So this patch goes halfway and improves the factoring on the remaining special cases, but leaves vec_splats, vec_promote, vec_extract, vec_insert, and vec_step alone. Bootstrapped and tested on powerpc64le-linux-gnu. Is this okay for trunk? Thanks, Bill 2022-01-18 Bill Schmidt gcc/ * config/rs6000/rs6000-c.cc (resolve_vec_mul): Accept args and types parameters instead of arglist and nargs. Simplify accordingly. Remove unnecessary test for argument count mismatch. (resolve_vec_cmpne): Likewise. (resolve_vec_adde_sube): Likewise. (resolve_vec_addec_subec): Likewise. (altivec_resolve_overloaded_builtin): Move overload special handling after the gathering of arguments into args[] and types[] and the test for correct number of arguments. Don't perform the test for correct number of arguments for certain special cases. Call the other special cases with args and types instead of arglist and nargs. --- gcc/config/rs6000/rs6000-c.cc | 304 ++ 1 file changed, 127 insertions(+), 177 deletions(-) diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 145421ab8f2..35c1383f059 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -939,37 +939,25 @@ altivec_build_resolved_builtin (tree *args, int n, tree fntype, tree ret_type, enum resolution { unresolved, resolved, resolved_bad }; /* Resolve an overloaded vec_mul call and return a tree expression for the - resolved call if successful. NARGS is the number of arguments to the call. - ARGLIST contains the arguments. RES must be set to indicate the status of + resolved call if successful. ARGS contains the arguments to the call. + TYPES contains their types. RES must be set to indicate the status of the resolution attempt. LOC contains statement location information. */ static tree -resolve_vec_mul (resolution *res, vec *arglist, unsigned nargs, -location_t loc) +resolve_vec_mul (resolution *res, tree *args, tree *types, location_t loc) { /* vec_mul needs to be special cased because there are no instructions for it for the {un}signed char, {un}signed short, and {un}signed int types. */ - if (nargs != 2) -{ - error ("builtin %qs only accepts 2 arguments", "vec_mul"); - *res = resolved; - return error_mark_node; -} - - tree arg0 = (*arglist)[0]; - tree arg0_type = TREE_TYPE (arg0); - tree arg1 = (*arglist)[1]; - tree arg1_type = TREE_TYPE (arg1); /* Both arguments must be vectors and the types must be compatible. */ - if (TREE_CODE (arg0_type) != VECTOR_TYPE - || !lang_hooks.types_compatible_p (arg0_type, arg1_type)) + if (TREE_CODE (types[0]) != VECTOR_TYPE + || !lang_hooks.types_compatible_p (types[0], types[1])) { *res = resolved_bad; return error_mark_node; } - switch (TYPE_MODE (TREE_TYPE (arg0_type))) + switch (TYPE_MODE (TREE_TYPE (types[0]))) { case E_QImode: case E_HImode: @@ -978,21 +966,21 @@ resolve_vec_mul (resolution *res, vec *arglist, unsigned nargs, case E_TImode: /* For scalar types just use a multiply expression. */ *res = resolved; - return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0, - fold_convert (TREE_TYPE (arg0), arg1)); + return fold_build2_loc (loc, MULT_EXPR, types[0], args[0], + fold_convert (types[0], args[1])); case E_SFmode: { /* For floats use the xvmulsp instruction directly. */ *res = resolved; tree call = rs6000_builtin_decls[RS6000_BIF_XVMULSP]; - return build_call_expr (call, 2, arg0, arg1); + return build_call_expr (call, 2, args[0], args[1]); } case E_DFmode: { /* For doubles use the xvmuldp instruction directly. */ *res = resolved; tree call = rs6000_builtin_decls[RS6000_BIF_XVMULDP]; - return build_call_expr (call, 2, arg0, arg1); + return build_call_expr (call, 2, args[0], args[1]); } /* Other types are errors. */
[PATCH v6 1/1] [ARM] Add support for TLS register based stack protector canary access
Add support for accessing the stack canary value via the TLS register, so that multiple threads running in the same address space can use distinct canary values. This is intended for the Linux kernel running in SMP mode, where processes entering the kernel are essentially threads running the same program concurrently: using a global variable for the canary in that context is problematic because it can never be rotated, and so the OS is forced to use the same value as long as it remains up. Using the TLS register to index the stack canary helps with this, as it allows each CPU to context switch the TLS register along with the rest of the process, permitting each process to use its own value for the stack canary. 2022-01-19 Ard Biesheuvel * config/arm/arm-opts.h (enum stack_protector_guard): New * config/arm/arm-protos.h (arm_stack_protect_tls_canary_mem): New * config/arm/arm.cc (TARGET_STACK_PROTECT_GUARD): Define (arm_option_override_internal): Handle and put in error checks for stack protector guard options. (arm_option_reconfigure_globals): Likewise (arm_stack_protect_tls_canary_mem): New (arm_stack_protect_guard): New * config/arm/arm.md (stack_protect_set): New (stack_protect_set_tls): Likewise (stack_protect_test): Likewise (stack_protect_test_tls): Likewise (reload_tp_hard): Likewise * config/arm/arm.opt (-mstack-protector-guard): New (-mstack-protector-guard-offset): New. * doc/invoke.texi: Document new options gcc/testsuite/ChangeLog: * gcc.target/arm/stack-protector-7.c: New test. * gcc.target/arm/stack-protector-8.c: New test. Signed-off-by: Ard Biesheuvel --- gcc/config/arm/arm-opts.h| 6 ++ gcc/config/arm/arm-protos.h | 2 + gcc/config/arm/arm.cc| 55 +++ gcc/config/arm/arm.md| 71 +++- gcc/config/arm/arm.opt | 22 ++ gcc/doc/invoke.texi | 11 +++ gcc/testsuite/gcc.target/arm/stack-protector-7.c | 12 gcc/testsuite/gcc.target/arm/stack-protector-8.c | 7 ++ 8 files changed, 184 insertions(+), 2 deletions(-) diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h index c50d5e56a181..24d12fafdec8 100644 --- a/gcc/config/arm/arm-opts.h +++ b/gcc/config/arm/arm-opts.h @@ -69,4 +69,10 @@ enum arm_tls_type { TLS_GNU, TLS_GNU2 }; + +/* Where to get the canary for the stack protector. */ +enum stack_protector_guard { + SSP_TLSREG, /* per-thread canary in TLS register */ + SSP_GLOBAL /* global canary */ +}; #endif diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index cd55a9f6ca54..881c72c988bd 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -195,6 +195,8 @@ extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); extern rtx arm_load_tp (rtx); extern bool arm_coproc_builtin_available (enum unspecv); extern bool arm_coproc_ldc_stc_legitimate_address (rtx); +extern rtx arm_stack_protect_tls_canary_mem (bool); + #if defined TREE_CODE extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 7825e364c01e..c192894ff33e 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -829,6 +829,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_MD_ASM_ADJUST #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust + +#undef TARGET_STACK_PROTECT_GUARD +#define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard /* Obstack for minipool constant handling. */ static struct obstack minipool_obstack; @@ -3176,6 +3179,26 @@ arm_option_override_internal (struct gcc_options *opts, if (TARGET_THUMB2_P (opts->x_target_flags)) opts->x_inline_asm_unified = true; + if (arm_stack_protector_guard == SSP_GLOBAL + && opts->x_arm_stack_protector_guard_offset_str) +{ + error ("incompatible options %'-mstack-protector-guard=global%' and" +"%'-mstack-protector-guard-offset=%qs%'", +arm_stack_protector_guard_offset_str); +} + + if (opts->x_arm_stack_protector_guard_offset_str) +{ + char *end; + const char *str = arm_stack_protector_guard_offset_str; + errno = 0; + long offs = strtol (arm_stack_protector_guard_offset_str, , 0); + if (!*str || *end || errno) + error ("%qs is not a valid offset in %qs", str, + "-mstack-protector-guard-offset="); + arm_stack_protector_guard_offset = offs; +} + #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS SUBTARGET_OVERRIDE_INTERNAL_OPTIONS; #endif @@ -3843,6 +3866,9 @@ arm_option_reconfigure_globals (void) else target_thread_pointer = TP_SOFT; } + + if (!TARGET_HARD_TP &&
[PATCH v6 0/1] implement TLS register based stack canary for ARM
Bugzilla: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102352 In the Linux kernel, user processes calling into the kernel are essentially threads running in the same address space, of a program that never terminates. This means that using a global variable for the stack protector canary value is problematic on SMP systems, as we can never change it unless we reboot the system. (Processes that sleep for any reason will do so on a call into the kernel, which means that there will always be live kernel stack frames carrying copies of the canary taken when the function was entered) AArch64 implements -mstack-protector-guard=sysreg for this purpose, as this permits the kernel to use different memory addresses for the stack canary for each CPU, and context switch the chosen system register with the rest of the process, allowing each process to use its own unique value for the stack canary. This patch implements something similar, but for the 32-bit ARM kernel, which will start using the user space TLS register TPIDRURO to index per-process metadata while running in the kernel. This means we can just add an offset to TPIDRURO to obtain the address from which to load the canary value. Changes since v5: - rebase onto latest changes, including .c -> .cc rename - ensure that tests execute only on targets that can support them Changes since v4: - add a couple of test cases - incorporate feedback received from Qing and Kyrylo Changes since v3: - force a reload of the TLS register before performing the stack protector check, so that we never rely on the stack for the address of the canary Changes since v2: - fix the template for stack_protect_test_tls so it correctly conveys the fact that it sets the Z flag Cc: Keith Packard Cc: thomas.preudho...@celest.fr Cc: adhemerval.zane...@linaro.org Cc: Qing Zhao Cc: Richard Sandiford Cc: Kyrylo Tkachov Cc: Richard Earnshaw Cc: gcc-patches@gcc.gnu.org Ard Biesheuvel (1): [ARM] Add support for TLS register based stack protector canary access gcc/config/arm/arm-opts.h| 6 ++ gcc/config/arm/arm-protos.h | 2 + gcc/config/arm/arm.cc| 55 +++ gcc/config/arm/arm.md| 71 +++- gcc/config/arm/arm.opt | 22 ++ gcc/doc/invoke.texi | 11 +++ gcc/testsuite/gcc.target/arm/stack-protector-7.c | 12 gcc/testsuite/gcc.target/arm/stack-protector-8.c | 7 ++ 8 files changed, 184 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/stack-protector-7.c create mode 100644 gcc/testsuite/gcc.target/arm/stack-protector-8.c -- 2.30.2
[committed] libstdc++: Include for size_t and ptrdiff_t [PR104123]
Tested x86_64-linux, pushed to trunk. libstdc++-v3/ChangeLog: PR libstdc++/104123 * testsuite/29_atomics/headers/stdatomic.h/c_compat.cc: Include . --- .../testsuite/29_atomics/headers/stdatomic.h/c_compat.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/c_compat.cc b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/c_compat.cc index 98c1102a974..80d2e150647 100644 --- a/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/c_compat.cc +++ b/libstdc++-v3/testsuite/29_atomics/headers/stdatomic.h/c_compat.cc @@ -92,12 +92,13 @@ static_assert(is_same); #endif static_assert(is_same); static_assert(is_same); -static_assert(is_same); -static_assert(is_same); #ifdef _GLIBCXX_USE_C99_STDINT_TR1 static_assert(is_same); static_assert(is_same); #endif +#include +static_assert(is_same); +static_assert(is_same); static_assert( requires (::atomic_int* i, int* e) { ::atomic_is_lock_free(i); -- 2.31.1
[PATCH] rs6000: Fix LE code gen for vec_cnt[lt]z_lsbb [PR95082]
Hi! https://gcc.gnu.org/PR95082 demonstrates that we don't generate correct code for vec_cntlz_lsbb and vec_cnttz_lsbb for little-endian targets. This patch corrects the problem by marking the built-ins as bif_is_endian and using the correct target patterns for each endianness. Note that the default patterns are for little endian, and the overridden patterns in rs6000-builtin.cc are for big endian. Bootstrapped and tested on powerpc64le-linux-gnu with no regressions. Is this okay for trunk, and eventually for backport to GCC 11? Thanks! Bill 2022-01-18 Bill Schmidt gcc/ PR target/95082 * config/rs6000/rs6000-builtin.cc (rs6000_expand_builtin): Handle endianness for vclzlsbb and vctzlsbb. * config/rs6000/rs6000-builtins.def (VCLZLSBB_V16QI): Change default pattern and indicate a different pattern will be used for big endian. (VCLZLSBB_V4SI): Likewise. (VCLZLSBB_V8HI): Likewise. (VCTZLSBB_V16QI): Likewise. (VCTZLSBB_V4SI): Likewise. (VCTZLSBB_V8HI): Likewise. gcc/testsuite/ PR target/95082 * gcc.target/powerpc/vsu/vec-cntlz-lsbb-0.c: Restrict to -mbig. * gcc.target/powerpc/vsu/vec-cntlz-lsbb-1.c: Likewise. * gcc.target/powerpc/vsu/vec-cntlz-lsbb-3.c: New. * gcc.target/powerpc/vsu/vec-cntlz-lsbb-4.c: New. * gcc.target/powerpc/vsu/vec-cnttz-lsbb-0.c: Restrict to -mbig. * gcc.target/powerpc/vsu/vec-cnttz-lsbb-1.c: Likewise. * gcc.target/powerpc/vsu/vec-cnttz-lsbb-3.c: New. * gcc.target/powerpc/vsu/vec-cnttz-lsbb-4.c: New. --- gcc/config/rs6000/rs6000-builtin.cc | 12 gcc/config/rs6000/rs6000-builtins.def | 12 ++-- .../gcc.target/powerpc/vsu/vec-cntlz-lsbb-0.c | 2 +- .../gcc.target/powerpc/vsu/vec-cntlz-lsbb-1.c | 2 +- .../gcc.target/powerpc/vsu/vec-cntlz-lsbb-3.c | 15 +++ .../gcc.target/powerpc/vsu/vec-cntlz-lsbb-4.c | 15 +++ .../gcc.target/powerpc/vsu/vec-cnttz-lsbb-0.c | 2 +- .../gcc.target/powerpc/vsu/vec-cnttz-lsbb-1.c | 2 +- .../gcc.target/powerpc/vsu/vec-cnttz-lsbb-3.c | 15 +++ .../gcc.target/powerpc/vsu/vec-cnttz-lsbb-4.c | 15 +++ 10 files changed, 82 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/vsu/vec-cntlz-lsbb-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsu/vec-cntlz-lsbb-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsu/vec-cnttz-lsbb-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/vsu/vec-cnttz-lsbb-4.c diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 6eca3568c02..421277a0ef0 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -3485,6 +3485,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */, icode = CODE_FOR_vsx_store_v8hi; else if (fcode == RS6000_BIF_ST_ELEMREV_V16QI) icode = CODE_FOR_vsx_store_v16qi; + else if (fcode == RS6000_BIF_VCLZLSBB_V16QI) + icode = CODE_FOR_vclzlsbb_v16qi; + else if (fcode == RS6000_BIF_VCLZLSBB_V4SI) + icode = CODE_FOR_vclzlsbb_v4si; + else if (fcode == RS6000_BIF_VCLZLSBB_V8HI) + icode = CODE_FOR_vclzlsbb_v8hi; + else if (fcode == RS6000_BIF_VCTZLSBB_V16QI) + icode = CODE_FOR_vctzlsbb_v16qi; + else if (fcode == RS6000_BIF_VCTZLSBB_V4SI) + icode = CODE_FOR_vctzlsbb_v4si; + else if (fcode == RS6000_BIF_VCTZLSBB_V8HI) + icode = CODE_FOR_vctzlsbb_v8hi; else gcc_unreachable (); } diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index cfe31c2e7de..2bb997a5279 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2551,13 +2551,13 @@ VBPERMD altivec_vbpermd {} const signed int __builtin_altivec_vclzlsbb_v16qi (vsc); -VCLZLSBB_V16QI vclzlsbb_v16qi {} +VCLZLSBB_V16QI vctzlsbb_v16qi {endian} const signed int __builtin_altivec_vclzlsbb_v4si (vsi); -VCLZLSBB_V4SI vclzlsbb_v4si {} +VCLZLSBB_V4SI vctzlsbb_v4si {endian} const signed int __builtin_altivec_vclzlsbb_v8hi (vss); -VCLZLSBB_V8HI vclzlsbb_v8hi {} +VCLZLSBB_V8HI vctzlsbb_v8hi {endian} const vsc __builtin_altivec_vctzb (vsc); VCTZB ctzv16qi2 {} @@ -2572,13 +2572,13 @@ VCTZW ctzv4si2 {} const signed int __builtin_altivec_vctzlsbb_v16qi (vsc); -VCTZLSBB_V16QI vctzlsbb_v16qi {} +VCTZLSBB_V16QI vclzlsbb_v16qi {endian} const signed int __builtin_altivec_vctzlsbb_v4si (vsi); -VCTZLSBB_V4SI vctzlsbb_v4si {} +VCTZLSBB_V4SI vclzlsbb_v4si {endian} const signed int __builtin_altivec_vctzlsbb_v8hi (vss); -VCTZLSBB_V8HI vctzlsbb_v8hi {} +VCTZLSBB_V8HI vclzlsbb_v8hi {endian} const signed int __builtin_altivec_vcmpaeb_p (vsc, vsc); VCMPAEB_P
Re: [PATCH] waccess: Look at calls when tracking clobbers [PR104092]
On 1/19/22 09:22, Richard Sandiford wrote: Martin Sebor writes: On 1/19/22 03:09, Richard Sandiford wrote: Richard Biener writes: On Tue, Jan 18, 2022 at 2:40 PM Richard Sandiford via Gcc-patches wrote: In this PR the waccess pass was fed: D.10779 ={v} {CLOBBER}; VIEW_CONVERT_EXPR(D.10779) = .MASK_LOAD_LANES (addr_5(D), 64B, _2); _7 = D.10779.__val[0]; However, the tracking of m_clobbers only looked at gassigns, so it missed that the clobber on the first line was overwritten by the call on the second line. Just as a note another possible def can come via asm() outputs and clobbers. There would have been walk_stmt_load_store_ops to track all those down (not sure if the function is a good fit here). Hmm. Looking at what the pass is doing in more detail, I'm not sure this approach to handling m_clobbers is safe. The pass walks the blocks in sequence (rather than using a dom walk, say): FOR_EACH_BB_FN (bb, fun) check_block (bb); so it could see the clobber after a later dominating assignment. Similarly check_call_dangling could see a use that is “protected” by a later assignment. check_call_dangling() reports only uses that are dominated by prior clobbers (determined in use_after_inval_p) so it should not have this problem. Yeah, but what I mean is that, if we have: A dominates B dominates C A clobbers X B defines X C uses X we could still see them in this order: A, C, B The dominance check would then succeed for even though B should invalidate the clobber. I see. I think you're right, that case of "clobber revival" isn't handled. I don't know how to trigger it or have a sense of how often it might come up (the dangling check runs only very early, before loop unrolling, to try to avoid it as much as possible). But running the first loop in dominator order instead as you suggest should be easy enough. Do you happen to have an idea for a test case to trigger the problem and verify it's fixed? Martin Thanks, Richard
Re: [PATCH] Make `-Werror` optional in libatomic/libbacktrace/libgomp/libitm/libsanitizer
On Mon, Jan 17, 2022 at 3:05 PM David Seifert via Gcc-patches wrote: > > * `-Werror` can cause issues when a more recent version of GCC compiles > an older version: > - https://bugs.gentoo.org/229059 > - https://bugs.gentoo.org/475350 > - https://bugs.gentoo.org/667104 > --- > libatomic/configure.ac| 6 -- > libbacktrace/configure.ac | 7 --- > libgomp/configure.ac | 6 -- > libitm/configure.ac | 6 -- > libsanitizer/configure.ac | 9 + > libsanitizer/libbacktrace/Makefile.am | 2 -- > 6 files changed, 25 insertions(+), 11 deletions(-) Hi, thanks for the patch, I personally support it (assuming it goes along with a regeneration of the configure scripts). Reading it, though, reminded me of another quibble I had about warning options used when building GCC: The GCC docs for -Wextra say "(This option used to be called -W. The older name is still supported, but the newer name is more descriptive.)" See: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#Warning-Options This to me seems to imply that -Wextra is the preferred name, and that the old name of -W should be replaced by it. Could we also make a change to rename all instances of -W in configure scripts to -Wextra too? Just a thought. Thanks, Eric > > diff --git a/libatomic/configure.ac b/libatomic/configure.ac > index f350b9b3509..5a9f69bb74d 100644 > --- a/libatomic/configure.ac > +++ b/libatomic/configure.ac > @@ -251,10 +251,12 @@ LIBAT_ENABLE_SYMVERS > CFLAGS="$save_CFLAGS" > AC_CACHE_SAVE > > +AC_ARG_ENABLE([werror], [ > + AS_HELP_STRING([--enable-werror], [turns on -Werror @<:@default=yes@:>@])]) > # Add -Wall -Werror if we are using GCC. > -if test "x$GCC" = "xyes"; then > +AS_IF([test "x$enable_werror" != "xno" && test "x$GCC" = "xyes"], [ >XCFLAGS="$XCFLAGS -Wall -Werror" > -fi > +]) > > # Add CET specific flags if CET is enabled > GCC_CET_FLAGS(CET_FLAGS) > diff --git a/libbacktrace/configure.ac b/libbacktrace/configure.ac > index 0dfd82bc03e..a2858e6ecde 100644 > --- a/libbacktrace/configure.ac > +++ b/libbacktrace/configure.ac > @@ -145,10 +145,11 @@ ACX_PROG_CC_WARNING_OPTS([-W -Wall -Wwrite-strings > -Wstrict-prototypes \ > -Wmissing-format-attribute -Wcast-qual], > [WARN_FLAGS]) > > -if test -n "${with_target_subdir}"; then > +AC_ARG_ENABLE([werror], [ > + AS_HELP_STRING([--enable-werror], [turns on -Werror @<:@default=yes@:>@])]) > +AS_IF([test "x$enable_werror" != "xno" && test -n "${with_target_subdir}"], [ >WARN_FLAGS="$WARN_FLAGS -Werror" > -fi > - > +]) > AC_SUBST(WARN_FLAGS) > > if test -n "${with_target_subdir}"; then > diff --git a/libgomp/configure.ac b/libgomp/configure.ac > index bfb613b91f0..c3062dc5a07 100644 > --- a/libgomp/configure.ac > +++ b/libgomp/configure.ac > @@ -121,10 +121,12 @@ AC_SUBST(CFLAGS) > # in both places for now and restore CFLAGS at the end of config. > save_CFLAGS="$CFLAGS" > > +AC_ARG_ENABLE([werror], [ > + AS_HELP_STRING([--enable-werror], [turns on -Werror @<:@default=yes@:>@])]) > # Add -Wall -Werror if we are using GCC. > -if test "x$GCC" = "xyes"; then > +AS_IF([test "x$enable_werror" != "xno" && test "x$GCC" = "xyes"], [ >XCFLAGS="$XCFLAGS -Wall -Werror" > -fi > +]) > > # Find other programs we need. > AC_CHECK_TOOL(AR, ar) > diff --git a/libitm/configure.ac b/libitm/configure.ac > index ac81b146845..ad99d14098e 100644 > --- a/libitm/configure.ac > +++ b/libitm/configure.ac > @@ -261,10 +261,12 @@ GCC_CHECK_ELF_STYLE_WEAKREF > CFLAGS="$save_CFLAGS" > AC_CACHE_SAVE > > +AC_ARG_ENABLE([werror], [ > + AS_HELP_STRING([--enable-werror], [turns on -Werror @<:@default=yes@:>@])]) > # Add -Wall -Werror if we are using GCC. > -if test "x$GCC" = "xyes"; then > +AS_IF([test "x$enable_werror" != "xno" && test "x$GCC" = "xyes"], [ >XCFLAGS="$XCFLAGS -Wall -Werror" > -fi > +]) > > XCFLAGS="$XCFLAGS $XPCFLAGS" > > diff --git a/libsanitizer/configure.ac b/libsanitizer/configure.ac > index 13cd302030d..64eb42afc83 100644 > --- a/libsanitizer/configure.ac > +++ b/libsanitizer/configure.ac > @@ -400,6 +400,15 @@ fi > AC_SUBST([TSAN_TARGET_DEPENDENT_OBJECTS]) > AC_SUBST([SANITIZER_COMMON_TARGET_DEPENDENT_OBJECTS]) > > +AC_ARG_ENABLE([werror], [ > + AS_HELP_STRING([--enable-werror], [turns on -Werror @<:@default=yes@:>@])]) > + > +WARN_FLAGS="-W -Wall -Wwrite-strings -Wmissing-format-attribute -Wcast-qual" > +AS_IF([test "x$enable_werror" != "xno"], [ > + WARN_FLAGS="$WARN_FLAGS -Werror" > +]) > +AC_SUBST([WARN_FLAGS]) > + > # Determine what GCC version number to use in filesystem paths. > GCC_BASE_VER > > diff --git a/libsanitizer/libbacktrace/Makefile.am > b/libsanitizer/libbacktrace/Makefile.am > index 16accd468df..0cf8d2104c0 100644 > --- a/libsanitizer/libbacktrace/Makefile.am > +++ b/libsanitizer/libbacktrace/Makefile.am > @@ -34,8 +34,6 @@ ACLOCAL_AMFLAGS = -I ../.. -I ../../config > AM_CPPFLAGS =
Re: [PATCH v5 1/1] [ARM] Add support for TLS register based stack protector canary access
On Wed, 19 Jan 2022 at 17:54, Kyrylo Tkachov wrote: > > Hi Ard, > > > -Original Message- > > From: Gcc-patches > bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Ard > > Biesheuvel via Gcc-patches > > Sent: Monday, November 15, 2021 6:04 PM > > To: linux-harden...@vger.kernel.org > > Cc: Richard Sandiford ; > > thomas.preudho...@celest.fr; Keith Packard ; > > gcc-patches@gcc.gnu.org; Kyrylo Tkachov ; Ard > > Biesheuvel > > Subject: [PATCH v5 1/1] [ARM] Add support for TLS register based stack > > protector canary access > > > > Add support for accessing the stack canary value via the TLS register, > > so that multiple threads running in the same address space can use > > distinct canary values. This is intended for the Linux kernel running in > > SMP mode, where processes entering the kernel are essentially threads > > running the same program concurrently: using a global variable for the > > canary in that context is problematic because it can never be rotated, > > and so the OS is forced to use the same value as long as it remains up. > > > > Using the TLS register to index the stack canary helps with this, as it > > allows each CPU to context switch the TLS register along with the rest > > of the process, permitting each process to use its own value for the > > stack canary. > > I've tested this patch on an arm-none-linux-gnueabihf target and the results > look clean. > Have you tested this patch with a kernel build as well? (since the > functionality is intended for that use). Of course. > If so, the patch is okay but please rebase it and repost so that we can > commit it taking into account > Will do.
Re: [RFC] Port git gcc-descr to Python
On Wed, Jan 19, 2022 at 8:18 AM Martin Liška wrote: > > On 1/19/22 13:49, Martin Jambor wrote: > > Hi, > > > > On Wed, Jan 19 2022, Martin Liška wrote: > >> On 10/18/21 11:01, Martin Liška wrote: > >>> On 10/12/21 10:59, Martin Liška wrote: > Hello. > > There's a complete patch that implements both git gcc-descr and > gcc-undesrc > and sets corresponding git aliases to use them. > > Ready to be installed? > Thanks, > Martin > >>> > >>> All right, so Jakub told me at IRC that we doesn't support porting to > >>> Python. > >>> However, he promised supporting the changes I made in the original shell > >>> script. > >> > >> @Jakub: May I remind this, please? > > > > putting the following line in ..git/config seems to do the trick? > > > > gcc-descr = "!f() { if test ${1:-no} = --short; then c=${2:-master}; > > r=$(git describe --all --match 'basepoints/gcc-[0-9]*' $c | sed -n > > 's,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)-\\([0-9]\\+\\)-g[0-9a-f]*$,r\\2-\\3,p;s,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)$,r\\2-0,p'); > > elif test ${1:-no} = --full; then c=${2:-master}; r=$(git describe --all > > --abbrev=40 --match 'basepoints/gcc-[0-9]*' $c | sed -n > > 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); else c=${1:-master}; r=$(git > > describe --all --abbrev=14 --match 'basepoints/gcc-[0-9]*' $c | sed -n > > 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); expr match ${r:-no} '^r[0-9]\\+$' > > >/dev/null && r=${r}-0-g$(git rev-parse ${2:-master}); fi; if test -n $r; > > then o=$(git config --get gcc-config.upstream); rr=$(echo $r | sed -n > > 's,^r\\([0-9]\\+\\)-[0-9]\\+\\(-g[0-9a-f]\\+\\)\\?$,\\1,p'); if git > > rev-parse --verify --quiet ${o:-origin}/releases/gcc-$rr >/dev/null; then > > m=releases/gcc-$rr; else m=master; fi; git merge-base --is-ancestor $c > > ${o:-origin}/$m && \\echo ${r}; fi; }; f" > > > > Derived from the following before squashing all the new lines: > > > > if test ${1:-no} = --short; then > > c=${2:-master}; > > r=$(git describe --all --match 'basepoints/gcc-[0-9]*' $c | sed -n > > 's,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)-\\([0-9]\\+\\)-g[0-9a-f]*$,r\\2-\\3,p;s,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)$,r\\2-0,p'); > > elif test ${1:-no} = --long; then > > c=${2:-master}; > > r=$(git describe --all --abbrev=40 --match 'basepoints/gcc-[0-9]*' $c > > | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); > > else > > c=${1:-master}; > > r=$(git describe --all --abbrev=14 --match 'basepoints/gcc-[0-9]*' $c > > | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); > > expr match ${r:-no} '^r[0-9]\\+$' >/dev/null && r=${r}-0-g$(git > > rev-parse ${2:-master}); > > fi; > > if test -n $r; then > > o=$(git config --get gcc-config.upstream); > > rr=$(echo $r | sed -n > > 's,^r\\([0-9]\\+\\)-[0-9]\\+\\(-g[0-9a-f]\\+\\)\\?$,\\1,p'); > > if git rev-parse --verify --quiet ${o:-origin}/releases/gcc-$rr > > >/dev/null; then > > m=releases/gcc-$rr; > > else > > m=master; > > fi; > > git merge-base --is-ancestor $c ${o:-origin}/$m && \\echo ${r}; > > fi; > > Anyway, can we please put the nicely formatted bash script to > contrib/git-describe.sh (contrib/git-undescribe.sh), having that squashed > to one line is just crazy. > > Martin, can you please do that? > > > > > > > (And if you wanted to ask then no, I am not touching > > contrib/gcc-git-customization.sh, not even with a long pole, it scares > > me :-) > > Sure, I can then adjust it if we end up with an outlined version where to code > sits in proper .sh files. > > Martin > > > > > Martin > > > Hi, while people are modifying contrib/gcc-git-customization.sh, could I get someone to take a look at bug 102644, too? https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102664 Thanks, Eric
RE: [PATCH v5 1/1] [ARM] Add support for TLS register based stack protector canary access
Hi Ard, > -Original Message- > From: Gcc-patches bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Ard > Biesheuvel via Gcc-patches > Sent: Monday, November 15, 2021 6:04 PM > To: linux-harden...@vger.kernel.org > Cc: Richard Sandiford ; > thomas.preudho...@celest.fr; Keith Packard ; > gcc-patches@gcc.gnu.org; Kyrylo Tkachov ; Ard > Biesheuvel > Subject: [PATCH v5 1/1] [ARM] Add support for TLS register based stack > protector canary access > > Add support for accessing the stack canary value via the TLS register, > so that multiple threads running in the same address space can use > distinct canary values. This is intended for the Linux kernel running in > SMP mode, where processes entering the kernel are essentially threads > running the same program concurrently: using a global variable for the > canary in that context is problematic because it can never be rotated, > and so the OS is forced to use the same value as long as it remains up. > > Using the TLS register to index the stack canary helps with this, as it > allows each CPU to context switch the TLS register along with the rest > of the process, permitting each process to use its own value for the > stack canary. I've tested this patch on an arm-none-linux-gnueabihf target and the results look clean. Have you tested this patch with a kernel build as well? (since the functionality is intended for that use). If so, the patch is okay but please rebase it and repost so that we can commit it taking into account > > 2021-11-15 Ard Biesheuvel > > * config/arm/arm-opts.h (enum stack_protector_guard): New > * config/arm/arm-protos.h (arm_stack_protect_tls_canary_mem): > New > * config/arm/arm.c (TARGET_STACK_PROTECT_GUARD): Define ... this file has now be renamed to arm.cc and... > (arm_option_override_internal): Handle and put in error checks > for stack protector guard options. > (arm_option_reconfigure_globals): Likewise > (arm_stack_protect_tls_canary_mem): New > (arm_stack_protect_guard): New > * config/arm/arm.md (stack_protect_set): New > (stack_protect_set_tls): Likewise > (stack_protect_test): Likewise > (stack_protect_test_tls): Likewise > (reload_tp_hard): Likewise > * config/arm/arm.opt (-mstack-protector-guard): New > (-mstack-protector-guard-offset): New. > * doc/invoke.texi: Document new options > > gcc/testsuite/ChangeLog: > > * gcc.target/arm/stack-protector-7.c: New test. > * gcc.target/arm/stack-protector-8.c: New test. > > Signed-off-by: Ard Biesheuvel > --- > gcc/config/arm/arm-opts.h| 6 ++ > gcc/config/arm/arm-protos.h | 2 + > gcc/config/arm/arm.c | 55 +++ > gcc/config/arm/arm.md| 71 +++- > gcc/config/arm/arm.opt | 22 ++ > gcc/doc/invoke.texi | 11 +++ > gcc/testsuite/gcc.target/arm/stack-protector-7.c | 10 +++ > gcc/testsuite/gcc.target/arm/stack-protector-8.c | 5 ++ > 8 files changed, 180 insertions(+), 2 deletions(-) > > diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h > index 5c4b62f404f7..581ba3c4fbbb 100644 > --- a/gcc/config/arm/arm-opts.h > +++ b/gcc/config/arm/arm-opts.h > @@ -69,4 +69,10 @@ enum arm_tls_type { >TLS_GNU, >TLS_GNU2 > }; > + > +/* Where to get the canary for the stack protector. */ > +enum stack_protector_guard { > + SSP_TLSREG, /* per-thread canary in TLS register */ > + SSP_GLOBAL /* global canary */ > +}; > #endif > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index 9b1f61394ad7..d8d605920c97 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -195,6 +195,8 @@ extern void arm_split_atomic_op (enum rtx_code, > rtx, rtx, rtx, rtx, rtx, rtx); > extern rtx arm_load_tp (rtx); > extern bool arm_coproc_builtin_available (enum unspecv); > extern bool arm_coproc_ldc_stc_legitimate_address (rtx); > +extern rtx arm_stack_protect_tls_canary_mem (bool); > + > > #if defined TREE_CODE > extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index a5b403eb3e49..e5077348ce07 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -829,6 +829,9 @@ static const struct attribute_spec > arm_attribute_table[] = > > #undef TARGET_MD_ASM_ADJUST > #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust > + > +#undef TARGET_STACK_PROTECT_GUARD > +#define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard > > > > /* Obstack for minipool constant handling. */ > static struct obstack minipool_obstack; > @@ -3176,6 +3179,26 @@ arm_option_override_internal (struct > gcc_options *opts, >if (TARGET_THUMB2_P (opts->x_target_flags)) > opts->x_inline_asm_unified =
Re: [PATCH] libstdc++: Fix for non-constexpr math_errhandling
On Wed, 19 Jan 2022 at 16:45, Jonathan Wakely wrote: > > On Wed, 19 Jan 2022 at 15:46, Matthias Kretz wrote: > > > > On Wednesday, 19 January 2022 16:21:15 CET Jonathan Wakely wrote: > > > On Wed, 19 Jan 2022, 12:45 Matthias Kretz, wrote: > > > > On Wednesday, 19 January 2022 13:07:26 CET Jonathan Wakely wrote: > > > > > So is it a macro or not? > > > > > > > > I agree the quote I used is unclear. The complete paragraph: > > > > > > > > The macros > > > > > > > > MATH_ERRNO > > > > MATH_ERREXCEPT > > > > > > > > expand to the integer constants 1 and 2, respectively; the macro > > > > > > > > math_errhandling > > > > > > > > expands to an expression that has type int and the value MATH_ERRNO, > > > > MATH_ERREXCEPT, or the bitwise OR of both. The value of math_errhandling > > > > is > > > > constant for the duration of the program. It is unspecified whether > > > > math_errhandling is a macro or an identifier with external linkage. If a > > > > macro > > > > definition is suppressed or a program defines an identifier with the > > > > name > > > > math_errhandling, the behavior is undefined. If the expression > > > > math_errhandling & MATH_ERREXCEPT can be nonzero, the implementation > > > > shall > > > > define the macros FE_DIVBYZERO, FE_INVALID, and FE_OVERFLOW in > > > > . > > > > > > But that still says "the macro math_errhandling" and then says it might > > > not > > > be a macro. > > > > There's also [cmath.syn] https://eel.is/c++draft/cmath.syn which says: > > > > #define math_errhandling see below > > > > So, FWIW, libstdc++ is required to define math_errhandling as a macro in > > . Thus, the original error (that math_errhandling wasn't defined even > > after was included) really needs a fix in . :-P > > No, because we get it from libc: > > #include_next So if you aren't seeing it after is included, your libc is broken.
Re: [PATCH] libstdc++: Fix for non-constexpr math_errhandling
On Wed, 19 Jan 2022 at 15:46, Matthias Kretz wrote: > > On Wednesday, 19 January 2022 16:21:15 CET Jonathan Wakely wrote: > > On Wed, 19 Jan 2022, 12:45 Matthias Kretz, wrote: > > > On Wednesday, 19 January 2022 13:07:26 CET Jonathan Wakely wrote: > > > > So is it a macro or not? > > > > > > I agree the quote I used is unclear. The complete paragraph: > > > > > > The macros > > > > > > MATH_ERRNO > > > MATH_ERREXCEPT > > > > > > expand to the integer constants 1 and 2, respectively; the macro > > > > > > math_errhandling > > > > > > expands to an expression that has type int and the value MATH_ERRNO, > > > MATH_ERREXCEPT, or the bitwise OR of both. The value of math_errhandling > > > is > > > constant for the duration of the program. It is unspecified whether > > > math_errhandling is a macro or an identifier with external linkage. If a > > > macro > > > definition is suppressed or a program defines an identifier with the name > > > math_errhandling, the behavior is undefined. If the expression > > > math_errhandling & MATH_ERREXCEPT can be nonzero, the implementation shall > > > define the macros FE_DIVBYZERO, FE_INVALID, and FE_OVERFLOW in > > > . > > > > But that still says "the macro math_errhandling" and then says it might not > > be a macro. > > There's also [cmath.syn] https://eel.is/c++draft/cmath.syn which says: > > #define math_errhandling see below > > So, FWIW, libstdc++ is required to define math_errhandling as a macro in > . Thus, the original error (that math_errhandling wasn't defined even > after was included) really needs a fix in . :-P No, because we get it from libc: #include_next
[committed] Update per-file selftest and finalization hooks for .c to .cc renaming
On Wed, 2022-01-19 at 14:46 +0100, Richard Biener wrote: > On Wed, Jan 19, 2022 at 2:45 PM David Malcolm via Gcc-patches > wrote: > > > > This is mostly a mechanical change, apart from: > > - fix the name of opt_proposer_c to match its filename > > (opt-suggestions.cc) > > - delete a bogus "modref_c_tests" decl from ipa-modref-tree.h > > that's been > > present since the initial commit of that file > > (d119f34c952f8718fdbabc63e2f369a16e92fa07) > > > > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. > > > > OK for trunk? (arguably it's an "obvious" followup to the .c to > > .cc > > renaming, but we're in stage 4; I think it makes sense to do it > > now, > > as it seems logically connected to the renaming, and low-risk) > > OK. Thanks. I noticed that the function name in attribs.cc was also misnamed ("attribute" vs "attribs"), so I took the liberty of fixing that as well. For reference, here's what I've pushed to trunk, as d5148d4faa6fb1e93b8ea1fad4a69806454f56f5, after doing another successful bootstrap & regresssion test on x86_64-pc-linux-gnu. gcc/ChangeLog: * attribs.cc (attribute_c_tests): Rename to... (attribs_cc_tests): ...this. * bitmap.cc (bitmap_c_tests): Rename to... (bitmap_cc_tests): ...this. * cgraph.cc (cgraph_c_finalize): Rename to... (cgraph_cc_finalize): ...this. (cgraph_c_tests): Rename to... (cgraph_cc_tests): ...this. * cgraph.h (cgraph_c_finalize): Rename to... (cgraph_cc_finalize): ...this. (cgraphunit_c_finalize): Rename to... (cgraphunit_cc_finalize): ...this. * cgraphunit.cc (cgraphunit_c_finalize): Rename to... (cgraphunit_cc_finalize): ...this. * convert.cc (convert_c_tests): Rename to... (convert_cc_tests): ...this. * dbgcnt.cc (dbgcnt_c_tests): Rename to... (dbgcnt_cc_tests): ...this. * diagnostic-show-locus.cc (diagnostic_show_locus_c_tests): Rename to... (diagnostic_show_locus_cc_tests): ...this. * diagnostic.cc (diagnostic_c_tests): Rename to... (diagnostic_cc_tests): ...this. * dumpfile.cc (dumpfile_c_tests): Rename to... (dumpfile_cc_tests): ...this. * dwarf2out.cc (dwarf2out_c_finalize): Rename to... (dwarf2out_cc_finalize): ...this. * dwarf2out.h (dwarf2out_c_finalize): Rename to... (dwarf2out_cc_finalize): ...this. * edit-context.cc (edit_context_c_tests): Rename to... (edit_context_cc_tests): ...this. * et-forest.cc (et_forest_c_tests): Rename to... (et_forest_cc_tests): ...this. * fibonacci_heap.cc (fibonacci_heap_c_tests): Rename to... (fibonacci_heap_cc_tests): ...this. * fold-const.cc (fold_const_c_tests): Rename to... (fold_const_cc_tests): ...this. * function-tests.cc (function_tests_c_tests): Rename to... (function_tests_cc_tests): ...this. * gcse.cc (gcse_c_finalize): Rename to... (gcse_cc_finalize): ...this. * gcse.h (gcse_c_finalize): Rename to... (gcse_cc_finalize): ...this. * ggc-tests.cc (ggc_tests_c_tests): Rename to... (ggc_tests_cc_tests): ...this. * gimple-ssa-store-merging.cc (store_merging_c_tests): Rename to... (store_merging_cc_tests): ...this. * gimple.cc (gimple_c_tests): Rename to... (gimple_cc_tests): ...this. * hash-map-tests.cc (hash_map_tests_c_tests): Rename to... (hash_map_tests_cc_tests): ...this. * hash-set-tests.cc (hash_set_tests_c_tests): Rename to... (hash_set_tests_cc_tests): ...this. * input.cc (input_c_tests): Rename to... (input_cc_tests): ...this. * ipa-cp.cc (ipa_cp_c_finalize): Rename to... (ipa_cp_cc_finalize): ...this. * ipa-fnsummary.cc (ipa_fnsummary_c_finalize): Rename to... (ipa_fnsummary_cc_finalize): ...this. * ipa-fnsummary.h (ipa_fnsummary_c_finalize): Rename to... (ipa_fnsummary_cc_finalize): ...this. * ipa-modref-tree.cc (ipa_modref_tree_c_tests): Rename to... (ipa_modref_tree_cc_tests): ...this. * ipa-modref-tree.h (modref_c_tests): Delete bogus decl. * ipa-modref.cc (ipa_modref_c_finalize): Rename to... (ipa_modref_cc_finalize): ...this. * ipa-modref.h (ipa_modref_c_finalize): Rename to... (ipa_modref_cc_finalize): ...this. * ipa-prop.h (ipa_cp_c_finalize): Rename to... (ipa_cp_cc_finalize): ...this. * ipa-reference.cc (ipa_reference_c_finalize): Rename to... (ipa_reference_cc_finalize): ...this. * ipa-reference.h (ipa_reference_c_finalize): Rename to... (ipa_reference_cc_finalize): ...this. * ira-costs.cc (ira_costs_c_finalize): Rename to... (ira_costs_cc_finalize): ...this. * ira.h (ira_costs_c_finalize): Rename to... (ira_costs_cc_finalize): ...this. *
Re: [PATCH] tree-optimization/104114 - avoid diagnosing V1mode lowering
Richard Biener via Gcc-patches writes: > Currently we diagnose vector lowering of V1mode operations that > are not natively supported into V_C_E, scalar op plus CTOR with > -Wvector-operation-performance but that's hardly useful behavior > even though the way we lower things can be improved. > > The following disables the diagnostics for the cases the vect.exp > testsuite runs into, on x86 that are vect-cond-11.c and > vect-singleton_1.c. > > Bootstrap / regtest pending on x86_64-unknown-linux-gnu. > > Comments? Makes sense to me FWIW. Thanks, Richard > Thanks, > Richard. > > 2022-01-19 Richard Biener > > PR tree-optimization/104114 > * tree-vect-generic.c (expand_vector_piecewise): Do not diagnose > single element vector decomposition. > --- > gcc/tree-vect-generic.cc | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc > index 5814a71a5bb..f4fc7d16119 100644 > --- a/gcc/tree-vect-generic.cc > +++ b/gcc/tree-vect-generic.cc > @@ -317,7 +317,10 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, > elem_op_func f, >int i; >location_t loc = gimple_location (gsi_stmt (*gsi)); > > - if (ret_type || !parallel_p) > + if (nunits == 1) > +/* Do not diagnose decomposing single element vectors. */ > +; > + else if (ret_type || !parallel_p) > warning_at (loc, OPT_Wvector_operation_performance, > "vector operation will be expanded piecewise"); >else
Re: [PATCH] waccess: Look at calls when tracking clobbers [PR104092]
Martin Sebor writes: > On 1/19/22 03:09, Richard Sandiford wrote: >> Richard Biener writes: >>> On Tue, Jan 18, 2022 at 2:40 PM Richard Sandiford via Gcc-patches >>> wrote: In this PR the waccess pass was fed: D.10779 ={v} {CLOBBER}; VIEW_CONVERT_EXPR(D.10779) = .MASK_LOAD_LANES (addr_5(D), 64B, _2); _7 = D.10779.__val[0]; However, the tracking of m_clobbers only looked at gassigns, so it missed that the clobber on the first line was overwritten by the call on the second line. >>> >>> Just as a note another possible def can come via asm() outputs >>> and clobbers. There would have been walk_stmt_load_store_ops >>> to track all those down (not sure if the function is a good fit here). >> >> Hmm. Looking at what the pass is doing in more detail, I'm not sure >> this approach to handling m_clobbers is safe. The pass walks the >> blocks in sequence (rather than using a dom walk, say): >> >>FOR_EACH_BB_FN (bb, fun) >> check_block (bb); >> >> so it could see the clobber after a later dominating assignment. >> Similarly check_call_dangling could see a use that is “protected” >> by a later assignment. > > check_call_dangling() reports only uses that are dominated by prior > clobbers (determined in use_after_inval_p) so it should not have > this problem. Yeah, but what I mean is that, if we have: A dominates B dominates C A clobbers X B defines X C uses X we could still see them in this order: A, C, B The dominance check would then succeed for even though B should invalidate the clobber. Thanks, Richard
[PATCH] c++: non-dependent immediate member fn call [PR99895]
Here we're emitting a bogus error during ahead of time evaluation of a non-dependent immediate member function call such as a.f(args) because the defacto templated form for such a call is (a.f)(args) but we're trying to evaluate it using the intermediate CALL_EXPR built by build_over_call, which has the non-member form f(a, args). The defacto member form is built in build_new_method_call, so it seems we should handle the immediate call there instead. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps 11? PR c++/99895 gcc/cp/ChangeLog: * call.cc (build_over_call): Don't evaluate non-dependent immediate member function calls here. (build_new_method_call): Instead evaluate them here. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/consteval-memfn1.C: New test. * g++.dg/cpp2a/consteval-memfn2.C: New test. --- gcc/cp/call.cc| 9 - gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C | 15 gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C | 34 +++ 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index d4a07a7a9b3..0583cc0083b 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -9241,7 +9241,10 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) addr, nargs, argarray); if (TREE_THIS_VOLATILE (fn) && cfun) current_function_returns_abnormally = 1; - if (immediate_invocation_p (fn, nargs)) + if (!DECL_FUNCTION_MEMBER_P (fn) + /* Non-dependent immediate member function calls are evaluated in +build_new_method_call. */ + && immediate_invocation_p (fn, nargs)) { tree obj_arg = NULL_TREE, exprimm = expr; if (DECL_CONSTRUCTOR_P (fn)) @@ -11227,6 +11230,10 @@ skip_prune: call = convert_from_reference (call); if (cast_to_void) call = build_nop (void_type_node, call); + + if (immediate_invocation_p (fn, vec_safe_length (orig_args))) + fold_non_dependent_expr (call, complain, +/*manifestly_const_eval=*/true); } /* Free all the conversions we allocated. */ diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C b/gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C new file mode 100644 index 000..d2df2e9b5ae --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/consteval-memfn1.C @@ -0,0 +1,15 @@ +// PR c++/99895 +// { dg-do compile { target c++20 } } + +struct fixed_string { + consteval int size(int n) const { +if (n < 0) throw; // { dg-error "not a constant" } +return n; + } +}; + +template +void VerifyHash(fixed_string s) { + s.size(0); // { dg-bogus "" } + s.size(-1); // { dg-message "expansion of" } +} diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C b/gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C new file mode 100644 index 000..71748f46b13 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/consteval-memfn2.C @@ -0,0 +1,34 @@ +// PR c++/99895 +// { dg-do compile { target c++20 } } + +static constexpr unsigned hash(const char* s, unsigned length) +{ +s=s; +return length; +} +template +struct fixed_string +{ +constexpr fixed_string(const char ()[N]) +{ +for (int i = 0; i < N; i++) +str[i] = s[i]; +} +consteval const char* data() const { return str; } +consteval unsigned size() const { return N-1; } +char str[N]; +}; +template +static consteval void VerifyHash() +{ +( + [](auto){static_assert(hash(s.data(), s.size()) == expected_hash);}(s) +,...); +// The compiler mistakenly translates s.data() into s.data() +// and then complains that the call is not valid, because +// the function expects 0 parameters and 1 "was provided". +} +void foo() +{ +VerifyHash<5, "khaki", "plums">(); +} -- 2.35.0.rc1
Re: [PATCH] waccess: Look at calls when tracking clobbers [PR104092]
On 1/19/22 03:09, Richard Sandiford wrote: Richard Biener writes: On Tue, Jan 18, 2022 at 2:40 PM Richard Sandiford via Gcc-patches wrote: In this PR the waccess pass was fed: D.10779 ={v} {CLOBBER}; VIEW_CONVERT_EXPR(D.10779) = .MASK_LOAD_LANES (addr_5(D), 64B, _2); _7 = D.10779.__val[0]; However, the tracking of m_clobbers only looked at gassigns, so it missed that the clobber on the first line was overwritten by the call on the second line. Just as a note another possible def can come via asm() outputs and clobbers. There would have been walk_stmt_load_store_ops to track all those down (not sure if the function is a good fit here). Hmm. Looking at what the pass is doing in more detail, I'm not sure this approach to handling m_clobbers is safe. The pass walks the blocks in sequence (rather than using a dom walk, say): FOR_EACH_BB_FN (bb, fun) check_block (bb); so it could see the clobber after a later dominating assignment. Similarly check_call_dangling could see a use that is “protected” by a later assignment. check_call_dangling() reports only uses that are dominated by prior clobbers (determined in use_after_inval_p) so it should not have this problem. Martin Richard
Re: [PATCH] c++: CTAD within alias template [PR91911]
On Mon, Jan 3, 2022 at 10:24 AM Patrick Palka wrote: > > On Wed, 22 Dec 2021, Jason Merrill wrote: > > > On 12/21/21 14:08, Patrick Palka wrote: > > > On Tue, Dec 21, 2021 at 2:03 PM Patrick Palka wrote: > > > > > > > > On Wed, Jun 30, 2021 at 4:23 PM Jason Merrill wrote: > > > > > > > > > > On 6/30/21 4:18 PM, Patrick Palka wrote: > > > > > > On Wed, Jun 30, 2021 at 3:51 PM Jason Merrill > > > > > > wrote: > > > > > > > > > > > > > > On 6/30/21 11:58 AM, Patrick Palka wrote: > > > > > > > > On Wed, 30 Jun 2021, Patrick Palka wrote: > > > > > > > > > > > > > > > > > On Fri, 25 Jun 2021, Jason Merrill wrote: > > > > > > > > > > > > > > > > > > > On 6/25/21 1:11 PM, Patrick Palka wrote: > > > > > > > > > > > On Fri, 25 Jun 2021, Jason Merrill wrote: > > > > > > > > > > > > > > > > > > > > > > > On 6/24/21 4:45 PM, Patrick Palka wrote: > > > > > > > > > > > > > In the first testcase below, during parsing of the > > > > > > > > > > > > > alias > > > > > > > > > > > > > template > > > > > > > > > > > > > ConstSpanType, transparency of alias template > > > > > > > > > > > > > specializations means we > > > > > > > > > > > > > replace SpanType with SpanType's substituted > > > > > > > > > > > > > definition. But this > > > > > > > > > > > > > substitution lowers the level of the CTAD placeholder > > > > > > > > > > > > > for span(T()) from > > > > > > > > > > > > > 2 to 1, and so the later instantiantion of > > > > > > > > > > > > > ConstSpanType > > > > > > > > > > > > > erroneously substitutes this CTAD placeholder with the > > > > > > > > > > > > > template argument > > > > > > > > > > > > > at level 1 index 0, i.e. with int, before we get a > > > > > > > > > > > > > chance to perform the > > > > > > > > > > > > > CTAD. > > > > > > > > > > > > > > > > > > > > > > > > > > In light of this, it seems we should avoid level > > > > > > > > > > > > > lowering when > > > > > > > > > > > > > substituting through through the type-id of a > > > > > > > > > > > > > dependent > > > > > > > > > > > > > alias template > > > > > > > > > > > > > specialization. To that end this patch makes > > > > > > > > > > > > > lookup_template_class_1 > > > > > > > > > > > > > pass tf_partial to tsubst in this situation. > > > > > > > > > > > > > > > > > > > > > > > > This makes sense, but what happens if SpanType is a > > > > > > > > > > > > member > > > > > > > > > > > > template, so > > > > > > > > > > > > that > > > > > > > > > > > > the levels of it and ConstSpanType don't match? Or the > > > > > > > > > > > > other way around? > > > > > > > > > > > > > > > > > > > > > > If SpanType is a member template of say the class > > > > > > > > > > > template A (and > > > > > > > > > > > thus its level is greater than ConstSpanType): > > > > > > > > > > > > > > > > > > > > > > template > > > > > > > > > > > struct A { > > > > > > > > > > > template > > > > > > > > > > > using SpanType = decltype(span(T())); > > > > > > > > > > > }; > > > > > > > > > > > > > > > > > > > > > > template > > > > > > > > > > > using ConstSpanType = span > > > > > > > > > > A::SpanType::value_type>; > > > > > > > > > > > > > > > > > > > > > > using type = ConstSpanType; > > > > > > > > > > > > > > > > > > > > > > then this case luckily works even without the patch > > > > > > > > > > > because > > > > > > > > > > > instantiate_class_template now reuses the specialization > > > > > > > > > > > A::SpanType > > > > > > > > > > > that was formed earlier during instantiation of A, > > > > > > > > > > > where we > > > > > > > > > > > substitute only a single level of template arguments, so > > > > > > > > > > > the > > > > > > > > > > > level of > > > > > > > > > > > the CTAD placeholder inside the defining-type-id of this > > > > > > > > > > > specialization > > > > > > > > > > > dropped from 3 to 2, so still more than the level of > > > > > > > > > > > ConstSpanType. > > > > > > > > > > > > > > > > > > > > > > This luck is short-lived though, because if we replace > > > > > > > > > > > A::SpanType with say A::SpanType > > > > > > > > > > > then > > > > > > > > > > > the testcase > > > > > > > > > > > breaks again (without the patch) because we no longer can > > > > > > > > > > > reuse that > > > > > > > > > > > specialization, so we instead form it on the spot by > > > > > > > > > > > substituting two > > > > > > > > > > > levels of template arguments (U=int,T=T) into the > > > > > > > > > > > defining-type-id, > > > > > > > > > > > causing the level of the placeholder to drop to 1. I > > > > > > > > > > > think > > > > > > > > > > > the patch > > > > > > > > > > > causes its level to remain 3 (though I guess it should > > > > > > > > > > > really be 2). > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > For the other way around, if ConstSpanType is a member > > > > > > > > > > > template of > > > > > > > > > > > say the class template B (and thus its level is greater > > > > > > > > > >
Re: [committed] rs6000: Fix bootstrap
On Wed, Jan 19, 2022 at 12:27:32PM +0100, Jakub Jelinek wrote: > On Wed, Jan 19, 2022 at 07:54:19AM +0100, Sebastian Huber wrote: > > On 18/01/2022 22:42, Segher Boessenkool wrote: > > > > +default: > > > > + break; > > > Please don't do that. You can do > > > > > >default: > > > break; > > > break; > > > /* And just to make sure: */ > > > break; > > > break; > > > > > > and it will do exactly the same as not having a default at all. Not > > > having such useless code is by far the most readable, so please don't > > > include a default case at all. > > > > I removed the default case. I hope this is what you wanted. It was. > Unfortunately the removal of default: break; breaks bootstrap:
Re: [PATCH] libstdc++: Fix for non-constexpr math_errhandling
On Wednesday, 19 January 2022 16:21:15 CET Jonathan Wakely wrote: > On Wed, 19 Jan 2022, 12:45 Matthias Kretz, wrote: > > On Wednesday, 19 January 2022 13:07:26 CET Jonathan Wakely wrote: > > > So is it a macro or not? > > > > I agree the quote I used is unclear. The complete paragraph: > > > > The macros > > > > MATH_ERRNO > > MATH_ERREXCEPT > > > > expand to the integer constants 1 and 2, respectively; the macro > > > > math_errhandling > > > > expands to an expression that has type int and the value MATH_ERRNO, > > MATH_ERREXCEPT, or the bitwise OR of both. The value of math_errhandling > > is > > constant for the duration of the program. It is unspecified whether > > math_errhandling is a macro or an identifier with external linkage. If a > > macro > > definition is suppressed or a program defines an identifier with the name > > math_errhandling, the behavior is undefined. If the expression > > math_errhandling & MATH_ERREXCEPT can be nonzero, the implementation shall > > define the macros FE_DIVBYZERO, FE_INVALID, and FE_OVERFLOW in > > . > > But that still says "the macro math_errhandling" and then says it might not > be a macro. There's also [cmath.syn] https://eel.is/c++draft/cmath.syn which says: #define math_errhandling see below So, FWIW, libstdc++ is required to define math_errhandling as a macro in . Thus, the original error (that math_errhandling wasn't defined even after was included) really needs a fix in . :-P -- ── Dr. Matthias Kretz https://mattkretz.github.io GSI Helmholtz Centre for Heavy Ion Research https://gsi.de stdₓ::simd ──
Re: [PATCH] libstdc++: Fix for non-constexpr math_errhandling
On Wed, 19 Jan 2022, 12:45 Matthias Kretz, wrote: > On Wednesday, 19 January 2022 13:07:26 CET Jonathan Wakely wrote: > > On Wed, 19 Jan 2022 at 08:10, Matthias Kretz wrote: > > > Follow-up to my last patch. This one is a more thorough fix. Tested on > > > x86_64- > > > linux. OK for trunk? > > > > > > 8< > > > > > > Use SFINAE magic to support: "It is unspecified whether > math_errhandling > > > is a macro or an identifier with external linkage." [C Standard] > > > > The patch is OK for trunk, but I don't understand what the C standard > means > > here. > > > > "the macro math_errhandling expands to [...]. It is unspecified whether > > math_errhandling is a macro or an identifier with external linkage." > > > > So is it a macro or not? > > I agree the quote I used is unclear. The complete paragraph: > > The macros > > MATH_ERRNO > MATH_ERREXCEPT > > expand to the integer constants 1 and 2, respectively; the macro > > math_errhandling > > expands to an expression that has type int and the value MATH_ERRNO, > MATH_ERREXCEPT, or the bitwise OR of both. The value of math_errhandling > is > constant for the duration of the program. It is unspecified whether > math_errhandling is a macro or an identifier with external linkage. If a > macro > definition is suppressed or a program defines an identifier with the name > math_errhandling, the behavior is undefined. If the expression > math_errhandling & MATH_ERREXCEPT can be nonzero, the implementation shall > define the macros FE_DIVBYZERO, FE_INVALID, and FE_OVERFLOW in > . > But that still says "the macro math_errhandling" and then says it might not be a macro. I'll ask some WG14 people for clarity, but it doesn't affect your patch.
Re: [PATCH, rs6000] Add a combine pattern for CA minus one [PR95737]
On Wed, Jan 19, 2022 at 2:12 AM HAO CHEN GUI wrote: > > Hi, >This patch adds a combine pattern for "CA minus one". As CA only has two > values (0 or 1), we could convert following pattern > (sign_extend:DI (plus:SI (reg:SI 98 ca) > (const_int -1 [0x] > to >(plus:DI (reg:DI 98 ca) > (const_int -1 [0x]))) > With this patch, it eliminates one unnecessary sign extend. Also in > rs6000, > regclass of CA register is set to NO_REGS. So CA is not in hard register set > and it can't match register_operand. The patch changes it to any_operand. Segher changed the class in 2014. https://gcc.gnu.org/pipermail/gcc-patches/2014-September/399192.html We need to ensure that it still is the correct decision in light of these new patterns. Thanks, David > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-01-19 Haochen Gui > > gcc/ > * config/rs6000/predicates.md (ca_operand): Match any_operand as CA > register is not in hard register set. > * config/rs6000/rs6000.md (extenddi_ca_minus_one): Define. > > gcc/testsuite/ > * gcc.target/powerpc/pr95737.c: New. > > > patch.diff > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index c65dfb91f3d..cd2ae1dc8e0 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -188,7 +188,7 @@ (define_predicate "vlogical_operand" > > ;; Return 1 if op is the carry register. > (define_predicate "ca_operand" > - (match_operand 0 "register_operand") > + (match_operand 0 "any_operand") > { >if (SUBREG_P (op)) > op = SUBREG_REG (op); > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 6ecb0bd6142..f1b09aad3b5 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -2358,6 +2358,21 @@ (define_insn "subf3_carry_in_xx" >"subfe %0,%0,%0" >[(set_attr "type" "add")]) > > +(define_insn_and_split "*extenddi_ca_minus_one" > + [(set (match_operand:DI 0 "gpc_reg_operand") > + (sign_extend:DI (plus:SI (match_operand:SI 1 "ca_operand") > +(const_int -1] > + "" > + "#" > + "" > + [(parallel [(set (match_dup 0) > + (plus:DI (match_dup 2) > + (const_int -1))) > + (clobber (match_dup 2))])] > +{ > + operands[2] = copy_rtx (operands[1]); > + PUT_MODE (operands[2], DImode); > +}) > > (define_insn "@neg2" >[(set (match_operand:GPR 0 "gpc_reg_operand" "=r") > diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c > b/gcc/testsuite/gcc.target/powerpc/pr95737.c > new file mode 100644 > index 000..94320f23423 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c > @@ -0,0 +1,10 @@ > +/* PR target/95737 */ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ > +/* { dg-final { scan-assembler-not {\mextsw\M} } } */ > + > + > +unsigned long long negativeLessThan (unsigned long long a, unsigned long > long b) > +{ > + return -(a < b); > +}
[committed] libstdc++: Fix libbacktrace build files
Tested x86_64-linux, pushed to trunk. This makes it possible to combine --enable-libstdcxx-debug with --enable-libstdcxx-backtrace, by adding a rule to src/Makefile to copy the backtrace-supported.h header into the src/debug/libbacktrace directory. Add libbacktrace path to testsuite flags so the tests can link without having the library installed. Also fix some warnings when running automake for the libbacktrace makefile. Use a per-library CPPFLAGS variable to fix: src/libbacktrace/Makefile.am:38: warning: AM_CPPFLAGS multiply defined in condition TRUE ... fragment.am:43: ... 'AM_CPPFLAGS' previously defined here src/libbacktrace/Makefile.am:32: 'fragment.am' included from here Create symlinks to the libbacktrace sources to fix: src/libbacktrace/Makefile.am:55: warning: source file '../../../libbacktrace/atomic.c' is in a subdirectory, src/libbacktrace/Makefile.am:55: but option 'subdir-objects' is disabled libstdc++-v3/ChangeLog: * scripts/testsuite_flags.in: Add src/libbacktrace/.libs to linker search paths. * src/Makefile.am: Fix src/debug/libbacktrace build. * src/Makefile.in: Regenerate. * src/libbacktrace/Makefile.am: Use per-library CPPFLAGS variable. Use symlinks for the source files. * src/libbacktrace/Makefile.in: Regenerate. --- libstdc++-v3/scripts/testsuite_flags.in | 3 +- libstdc++-v3/src/Makefile.am | 12 +- libstdc++-v3/src/Makefile.in | 9 +- libstdc++-v3/src/libbacktrace/Makefile.am | 56 ++ libstdc++-v3/src/libbacktrace/Makefile.in | 128 +- 5 files changed, 132 insertions(+), 76 deletions(-) diff --git a/libstdc++-v3/scripts/testsuite_flags.in b/libstdc++-v3/scripts/testsuite_flags.in index cf7f0f7411e..40dd3d3465e 100755 --- a/libstdc++-v3/scripts/testsuite_flags.in +++ b/libstdc++-v3/scripts/testsuite_flags.in @@ -78,7 +78,8 @@ case ${query} in ;; --cxxldflags) SECTIONLDFLAGS="@SECTION_LDFLAGS@ @LIBICONV@ - -L${BUILD_DIR}/src/filesystem/.libs" + -L${BUILD_DIR}/src/filesystem/.libs + -L${BUILD_DIR}/src/libbacktrace/.libs" echo ${SECTIONLDFLAGS} ;; *) diff --git a/libstdc++-v3/src/Makefile.am b/libstdc++-v3/src/Makefile.am index 71a0da2cd93..18f57632c3d 100644 --- a/libstdc++-v3/src/Makefile.am +++ b/libstdc++-v3/src/Makefile.am @@ -30,8 +30,10 @@ endif if ENABLE_BACKTRACE backtrace_dir = libbacktrace +backtrace_supported_h = $(backtrace_dir)/backtrace-supported.h else backtrace_dir = +backtrace_supported_h = endif ## Keep this list sync'd with acinclude.m4:GLIBCXX_CONFIGURE. @@ -402,7 +404,15 @@ stamp-debug: Makefile $(foreach dir,$(SUBDIRS),$(dir)/Makefile) fi; \ echo `date` > stamp-debug; -build-debug: stamp-debug +if ENABLE_BACKTRACE +${debugdir}/$(backtrace_supported_h): $(backtrace_supported_h) stamp-debug + cp $< $@ +debug_backtrace_supported_h = ${debugdir}/$(backtrace_supported_h) +else +debug_backtrace_supported_h = +endif + +build-debug: stamp-debug $(debug_backtrace_supported_h) (cd ${debugdir}; \ mv Makefile Makefile.tmp; \ sed -e 's,all-local: all-once,all-local:,' \ diff --git a/libstdc++-v3/src/libbacktrace/Makefile.am b/libstdc++-v3/src/libbacktrace/Makefile.am index 3a3195167b2..0f1143507f3 100644 --- a/libstdc++-v3/src/libbacktrace/Makefile.am +++ b/libstdc++-v3/src/libbacktrace/Makefile.am @@ -35,9 +35,12 @@ toolexeclib_LTLIBRARIES = libstdc++_libbacktrace.la ACLOCAL_AMFLAGS = -I ../.. -I ../../config -AM_CPPFLAGS = -I $(top_srcdir)/../include -I $(top_srcdir)/../libgcc \ +# This will be used instead of the common AM_CPPFLAGS from fragment.am +libstdc___libbacktrace_la_CPPFLAGS = \ + -I $(top_srcdir)/../include -I $(top_srcdir)/../libgcc \ -I ../../../libgcc -I .. -I $(top_srcdir) \ -I $(top_srcdir)/../libbacktrace \ + -I $(top_srcdir)/../libiberty \ -include $(top_srcdir)/src/libbacktrace/backtrace-rename.h \ $(BACKTRACE_CPPFLAGS) @@ -50,42 +53,55 @@ AM_CFLAGS += $(EXTRA_CFLAGS) AM_CXXFLAGS = $(CXX_WARN_FLAGS) -fno-rtti -fno-exceptions AM_CXXFLAGS += $(EXTRA_CXXFLAGS) +obj_prefix = std_stacktrace + +# Each FILE.c in SOURCES will be compiled to SHORTNAME-FILE.o +libstdc___libbacktrace_la_SHORTNAME = $(obj_prefix) + libstdc___libbacktrace_la_SOURCES = \ - ../../../libbacktrace/backtrace.h \ - ../../../libbacktrace/atomic.c \ - ../../../libbacktrace/dwarf.c \ - ../../../libbacktrace/fileline.c \ - ../../../libbacktrace/internal.h \ - ../../../libbacktrace/posix.c \ - ../../../libbacktrace/sort.c \ - ../../../libbacktrace/simple.c \ - ../../../libbacktrace/state.c \ - ../../../libiberty/cp-demangle.c + atomic.c \ + dwarf.c \ + fileline.c \ + posix.c \ + sort.c \ + simple.c \ + state.c \ + cp-demangle.c
Re: [PATCH, rs6000] Add a combine pattern for CA minus one [PR95737]
On Wed, Jan 19, 2022 at 2:12 AM HAO CHEN GUI wrote: > > Hi, >This patch adds a combine pattern for "CA minus one". As CA only has two > values (0 or 1), we could convert following pattern > (sign_extend:DI (plus:SI (reg:SI 98 ca) > (const_int -1 [0x] > to >(plus:DI (reg:DI 98 ca) > (const_int -1 [0x]))) > With this patch, it eliminates one unnecessary sign extend. Also in > rs6000, > regclass of CA register is set to NO_REGS. So CA is not in hard register set > and it can't match register_operand. The patch changes it to any_operand. CA_REGNO should be in class CA_REGS, not class NO_REGS. This seems like a major, latent bug. Thanks, David > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > Is this okay for trunk? Any recommendations? Thanks a lot. > > ChangeLog > 2022-01-19 Haochen Gui > > gcc/ > * config/rs6000/predicates.md (ca_operand): Match any_operand as CA > register is not in hard register set. > * config/rs6000/rs6000.md (extenddi_ca_minus_one): Define. > > gcc/testsuite/ > * gcc.target/powerpc/pr95737.c: New. > > > patch.diff > diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md > index c65dfb91f3d..cd2ae1dc8e0 100644 > --- a/gcc/config/rs6000/predicates.md > +++ b/gcc/config/rs6000/predicates.md > @@ -188,7 +188,7 @@ (define_predicate "vlogical_operand" > > ;; Return 1 if op is the carry register. > (define_predicate "ca_operand" > - (match_operand 0 "register_operand") > + (match_operand 0 "any_operand") > { >if (SUBREG_P (op)) > op = SUBREG_REG (op); > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 6ecb0bd6142..f1b09aad3b5 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -2358,6 +2358,21 @@ (define_insn "subf3_carry_in_xx" >"subfe %0,%0,%0" >[(set_attr "type" "add")]) > > +(define_insn_and_split "*extenddi_ca_minus_one" > + [(set (match_operand:DI 0 "gpc_reg_operand") > + (sign_extend:DI (plus:SI (match_operand:SI 1 "ca_operand") > +(const_int -1] > + "" > + "#" > + "" > + [(parallel [(set (match_dup 0) > + (plus:DI (match_dup 2) > + (const_int -1))) > + (clobber (match_dup 2))])] > +{ > + operands[2] = copy_rtx (operands[1]); > + PUT_MODE (operands[2], DImode); > +}) > > (define_insn "@neg2" >[(set (match_operand:GPR 0 "gpc_reg_operand" "=r") > diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c > b/gcc/testsuite/gcc.target/powerpc/pr95737.c > new file mode 100644 > index 000..94320f23423 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c > @@ -0,0 +1,10 @@ > +/* PR target/95737 */ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-O2 -mdejagnu-cpu=power8" } */ > +/* { dg-final { scan-assembler-not {\mextsw\M} } } */ > + > + > +unsigned long long negativeLessThan (unsigned long long a, unsigned long > long b) > +{ > + return -(a < b); > +}
Re: [vect] PR103997: Fix epilogue mode skipping
On 19/01/2022 11:04, Richard Biener wrote: On Tue, 18 Jan 2022, Andre Vieira (lists) wrote: On 14/01/2022 09:57, Richard Biener wrote: The 'used_vector_modes' is also a heuristic by itself since it registers every vector type we query, not only those that are used in the end ... So it's really all heuristics that can eventually go bad. IMHO remembering the VF that we ended up with (maybe w/o unrolling) for each analyzed vector_mode[] might be really the easiest thing to do, that should make it easy to skip those modes where the VF is larger or equal as the VF of the main loop for the purpose of epilogue vectorization. Likewise those vector_mode[] that failed analysis can be remembered (with -1U VF for example). Richard. I liked the caching suggestion, so here it is. Sorry for the delay, wanted to post this after pushing the vect unroll which was waiting on some retesting for the rebase. LGTM. Thanks, Richard. gcc/ChangeLog: PR 103997 * tree-vect-loop.c (vect_analyze_loop): Fix mode skipping for epilogue vectorization. Thanks! Committed the following patch. I updated the comment above the last change as I realized it still described the old behaviour. diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 0fe3529b2d1cf36617c04c1d0f1c4c7bb363607c..e15738ee6c4a1d2cf6bfa4c291a4dc46faaaf7a5 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3004,6 +3004,12 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) unsigned int mode_i = 0; unsigned HOST_WIDE_INT simdlen = loop->simdlen; + /* Keep track of the VF for each mode. Initialize all to 0 which indicates + a mode has not been analyzed. */ + auto_vec cached_vf_per_mode; + for (unsigned i = 0; i < vector_modes.length (); ++i) +cached_vf_per_mode.safe_push (0); + /* First determine the main loop vectorization mode, either the first one that works, starting with auto-detecting the vector mode and then following the targets order of preference, or the one with the @@ -3011,6 +3017,10 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) while (1) { bool fatal; + unsigned int last_mode_i = mode_i; + /* Set cached VF to -1 prior to analysis, which indicates a mode has +failed. */ + cached_vf_per_mode[last_mode_i] = -1; opt_loop_vec_info loop_vinfo = vect_analyze_loop_1 (loop, shared, _form_info, NULL, vector_modes, mode_i, @@ -3020,6 +3030,12 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) if (loop_vinfo) { + /* Analyzis has been successful so update the VF value. The + VF should always be a multiple of unroll_factor and we want to + capture the original VF here. */ + cached_vf_per_mode[last_mode_i] + = exact_div (LOOP_VINFO_VECT_FACTOR (loop_vinfo), +loop_vinfo->suggested_unroll_factor); /* Once we hit the desired simdlen for the first time, discard any previous attempts. */ if (simdlen @@ -3100,12 +3116,10 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) { /* If the target does not support partial vectors we can shorten the number of modes to analyze for the epilogue as we know we can't pick a -mode that has at least as many NUNITS as the main loop's vectorization -factor, since that would imply the epilogue's vectorization factor -would be at least as high as the main loop's and we would be -vectorizing for more scalar iterations than there would be left. */ +mode that would lead to a VF at least as big as the +FIRST_VINFO_VF. */ if (!supports_partial_vectors - && maybe_ge (GET_MODE_NUNITS (vector_modes[mode_i]), first_vinfo_vf)) + && maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf)) { mode_i++; if (mode_i == vector_modes.length ())
Re: PING 4 [PATCH v2 2/2] add -Wdangling-pointer [PR #63272]
On 17/01/2022 20:14, Martin Sebor wrote: I tried to set up OpenOffice for testing with the latest GCC but couldn't get the build to finish (it failed downloading some unavailable prerequisites). I don't remember what problem I ran into with LibreOffice; it was before I upgraded to Fedora 35 just a couple of weeks ago. Let me retry again (the build is still downloading tarballs). In the meantime, do you have any tips or suggestions getting it set up that aren't on the instructions page below? (Especially for using an alternate compiler and non-default options.) https://wiki.documentfoundation.org/Development/BuildingOnLinux#Fedora.2FRedHat Building LibreOffice from source should be relatively easy these days, esp. on Linux. Let me know if you have any specific issues. What I do to build against a GCC other than the system one is to include the two lines CC=/path/to/gcc CXX=/path/to/g++ in autogen.input. (And if you have any issues building LibreOffice, I guess you would have an even worse experience trying to build OpenOffice. I for one never looked back.)
Re: [PATCH] testsuite: Test evrp-trans.c also with unsigned types [PR104115]
OK. Thanks. On Wed, Jan 19, 2022, 14:58 Jakub Jelinek wrote: > Hi! > > The testcase from the PR got fixed with r12-3119-g675a3e40567e1d > and looks quite similar to the evrp-trans.c test, except evrp-trans.c > is tested on signed integer types. > I think it would be useful to test it for unsigned comparisons too. > > Tested on x86_64-linux (-m32/-m64), ok for trunk? > > 2022-01-19 Jakub Jelinek > > PR c/104115 > * gcc.dg/tree-ssa/evrp-trans2.c: New test. > > --- gcc/testsuite/gcc.dg/tree-ssa/evrp-trans2.c.jj 2022-01-19 > 14:54:31.195317913 +0100 > +++ gcc/testsuite/gcc.dg/tree-ssa/evrp-trans2.c 2022-01-19 > 14:54:52.964019002 +0100 > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-evrp" } */ > + > +#define int unsigned > +#include "evrp-trans.c" > + > +/* { dg-final { scan-tree-dump-not "kill" "evrp" } } */ > +/* { dg-final { scan-tree-dump-times "keep" 13 "evrp"} } */ > > Jakub > >
[PATCH] testsuite: Test evrp-trans.c also with unsigned types [PR104115]
Hi! The testcase from the PR got fixed with r12-3119-g675a3e40567e1d and looks quite similar to the evrp-trans.c test, except evrp-trans.c is tested on signed integer types. I think it would be useful to test it for unsigned comparisons too. Tested on x86_64-linux (-m32/-m64), ok for trunk? 2022-01-19 Jakub Jelinek PR c/104115 * gcc.dg/tree-ssa/evrp-trans2.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/evrp-trans2.c.jj 2022-01-19 14:54:31.195317913 +0100 +++ gcc/testsuite/gcc.dg/tree-ssa/evrp-trans2.c 2022-01-19 14:54:52.964019002 +0100 @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-evrp" } */ + +#define int unsigned +#include "evrp-trans.c" + +/* { dg-final { scan-tree-dump-not "kill" "evrp" } } */ +/* { dg-final { scan-tree-dump-times "keep" 13 "evrp"} } */ Jakub
Re: [PATCH] match.pd, optabs: Avoid vectorization of {FLOOR,CEIL,ROUND}_{DIV,MOD}_EXPR [PR102860]
On Wed, 19 Jan 2022, Jakub Jelinek wrote: > Hi! > > power10 has modv4si3 expander and so vectorizes the following testcase > where Fortran modulo is FLOOR_MOD_EXPR. > optabs_for_tree_code indicates that the optab for all the *_MOD_EXPR > variants is umod_optab or smod_optab, but that isn't true, that optab > actually expands just TRUNC_MOD_EXPR. For the other tree codes expmed.cc > has code how to adjust the TRUNC_MOD_EXPR into those by emitting some > extra comparisons and conditional updates. Similarly for *_DIV_EXPR, > except in that case it actually needs both division and modulo. > > While it would be possible to handle it in expmed.cc for vectors as well, > we'd need to be sure all the vector operations we need for that are > available, and furthermore we wouldn't account for that in the costing. > > So, IMHO it is better to stop pretending those non-truncating (and > non-exact) div/mod operations have an optab. For GCC 13, we should > IMHO pattern match these in tree-vect-patterns.cc and transform them > to truncating div/mod with follow-up adjustments and let the vectorizer > vectorize that. As written in the PR, for signed operands: > r = x %[fl] y; > is > r = x % y; if (r && (x ^ y) < 0) r += y; > and > d = x /[fl] y; > is > r = x % y; d = x / y; if (r && (x ^ y) < 0) --d; > and > r = x %[cl] y; > is > r = x % y; if (r && (x ^ y) >= 0) r -= y; > and > d = /[cl] y; > is > r = x % y; d = x / y; if (r && (x ^ y) >= 0) ++d; > (too lazy to figure out rounding div/mod now). I'll create a PR > for that. > The patch also extends a match.pd optimization that floor_mod on > unsigned operands is actually trunc_mod. > > Bootstrapped/regtested on powerpc64le-linux, ok for trunk? OK. Thanks, Richard. > 2022-01-19 Jakub Jelinek > > PR middle-end/102860 > * match.pd (x %[fl] y -> x % y): New simplification for > unsigned integral types. > * optabs-tree.cc (optab_for_tree_code): Return unknown_optab > for {CEIL,FLOOR,ROUND}_{DIV,MOD}_EXPR with VECTOR_TYPE. > > * gfortran.dg/pr102860.f90: New test. > > --- gcc/match.pd.jj 2022-01-19 00:00:57.876401009 +0100 > +++ gcc/match.pd 2022-01-19 10:46:56.710675088 +0100 > @@ -425,12 +425,15 @@ (define_operator_list SYNC_FETCH_AND_AND > { build_minus_one_cst (type); }))) > > /* For unsigned integral types, FLOOR_DIV_EXPR is the same as > - TRUNC_DIV_EXPR. Rewrite into the latter in this case. */ > -(simplify > - (floor_div @0 @1) > - (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type)) > - && TYPE_UNSIGNED (type)) > - (trunc_div @0 @1))) > + TRUNC_DIV_EXPR. Rewrite into the latter in this case. Similarly > + for MOD instead of DIV. */ > +(for floor_divmod (floor_div floor_mod) > + trunc_divmod (trunc_div trunc_mod) > + (simplify > + (floor_divmod @0 @1) > + (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type)) > + && TYPE_UNSIGNED (type)) > + (trunc_divmod @0 @1 > > /* Combine two successive divisions. Note that combining ceil_div > and floor_div is trickier and combining round_div even more so. */ > --- gcc/optabs-tree.cc.jj 2022-01-18 11:58:59.740979799 +0100 > +++ gcc/optabs-tree.cc2022-01-19 10:56:13.755956061 +0100 > @@ -56,17 +56,32 @@ optab_for_tree_code (enum tree_code code > case MULT_HIGHPART_EXPR: >return TYPE_UNSIGNED (type) ? umul_highpart_optab : > smul_highpart_optab; > > -case TRUNC_MOD_EXPR: > case CEIL_MOD_EXPR: > case FLOOR_MOD_EXPR: > case ROUND_MOD_EXPR: > + /* {s,u}mod_optab implements TRUNC_MOD_EXPR. For scalar modes, > + expansion has code to adjust TRUNC_MOD_EXPR into the desired other > + modes, but for vector modes it does not. The adjustment code > + should be instead emitted in tree-vect-patterns.cc. */ > + if (TREE_CODE (type) == VECTOR_TYPE) > + return unknown_optab; > + /* FALLTHRU */ > +case TRUNC_MOD_EXPR: >return TYPE_UNSIGNED (type) ? umod_optab : smod_optab; > > -case RDIV_EXPR: > -case TRUNC_DIV_EXPR: > case CEIL_DIV_EXPR: > case FLOOR_DIV_EXPR: > case ROUND_DIV_EXPR: > + /* {,u}{s,u}div_optab implements {TRUNC,EXACT}_DIV_EXPR or RDIV_EXPR. > + For scalar modes, expansion has code to adjust TRUNC_DIV_EXPR > + into the desired other modes, but for vector modes it does not. > + The adjustment code should be instead emitted in > + tree-vect-patterns.cc. */ > + if (TREE_CODE (type) == VECTOR_TYPE) > + return unknown_optab; > + /* FALLTHRU */ > +case RDIV_EXPR: > +case TRUNC_DIV_EXPR: > case EXACT_DIV_EXPR: >if (TYPE_SATURATING (type)) > return TYPE_UNSIGNED (type) ? usdiv_optab : ssdiv_optab; > --- gcc/testsuite/gfortran.dg/pr102860.f90.jj 2022-01-19 10:46:56.712675060 > +0100 > +++ gcc/testsuite/gfortran.dg/pr102860.f902022-01-19 10:46:56.712675060 > +0100 > @@ -0,0 +1,10 @@ > +! PR middle-end/102860 > +! { dg-do
[PATCH] match.pd, optabs: Avoid vectorization of {FLOOR,CEIL,ROUND}_{DIV,MOD}_EXPR [PR102860]
Hi! power10 has modv4si3 expander and so vectorizes the following testcase where Fortran modulo is FLOOR_MOD_EXPR. optabs_for_tree_code indicates that the optab for all the *_MOD_EXPR variants is umod_optab or smod_optab, but that isn't true, that optab actually expands just TRUNC_MOD_EXPR. For the other tree codes expmed.cc has code how to adjust the TRUNC_MOD_EXPR into those by emitting some extra comparisons and conditional updates. Similarly for *_DIV_EXPR, except in that case it actually needs both division and modulo. While it would be possible to handle it in expmed.cc for vectors as well, we'd need to be sure all the vector operations we need for that are available, and furthermore we wouldn't account for that in the costing. So, IMHO it is better to stop pretending those non-truncating (and non-exact) div/mod operations have an optab. For GCC 13, we should IMHO pattern match these in tree-vect-patterns.cc and transform them to truncating div/mod with follow-up adjustments and let the vectorizer vectorize that. As written in the PR, for signed operands: r = x %[fl] y; is r = x % y; if (r && (x ^ y) < 0) r += y; and d = x /[fl] y; is r = x % y; d = x / y; if (r && (x ^ y) < 0) --d; and r = x %[cl] y; is r = x % y; if (r && (x ^ y) >= 0) r -= y; and d = /[cl] y; is r = x % y; d = x / y; if (r && (x ^ y) >= 0) ++d; (too lazy to figure out rounding div/mod now). I'll create a PR for that. The patch also extends a match.pd optimization that floor_mod on unsigned operands is actually trunc_mod. Bootstrapped/regtested on powerpc64le-linux, ok for trunk? 2022-01-19 Jakub Jelinek PR middle-end/102860 * match.pd (x %[fl] y -> x % y): New simplification for unsigned integral types. * optabs-tree.cc (optab_for_tree_code): Return unknown_optab for {CEIL,FLOOR,ROUND}_{DIV,MOD}_EXPR with VECTOR_TYPE. * gfortran.dg/pr102860.f90: New test. --- gcc/match.pd.jj 2022-01-19 00:00:57.876401009 +0100 +++ gcc/match.pd2022-01-19 10:46:56.710675088 +0100 @@ -425,12 +425,15 @@ (define_operator_list SYNC_FETCH_AND_AND { build_minus_one_cst (type); }))) /* For unsigned integral types, FLOOR_DIV_EXPR is the same as - TRUNC_DIV_EXPR. Rewrite into the latter in this case. */ -(simplify - (floor_div @0 @1) - (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type)) - && TYPE_UNSIGNED (type)) - (trunc_div @0 @1))) + TRUNC_DIV_EXPR. Rewrite into the latter in this case. Similarly + for MOD instead of DIV. */ +(for floor_divmod (floor_div floor_mod) + trunc_divmod (trunc_div trunc_mod) + (simplify + (floor_divmod @0 @1) + (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type)) + && TYPE_UNSIGNED (type)) + (trunc_divmod @0 @1 /* Combine two successive divisions. Note that combining ceil_div and floor_div is trickier and combining round_div even more so. */ --- gcc/optabs-tree.cc.jj 2022-01-18 11:58:59.740979799 +0100 +++ gcc/optabs-tree.cc 2022-01-19 10:56:13.755956061 +0100 @@ -56,17 +56,32 @@ optab_for_tree_code (enum tree_code code case MULT_HIGHPART_EXPR: return TYPE_UNSIGNED (type) ? umul_highpart_optab : smul_highpart_optab; -case TRUNC_MOD_EXPR: case CEIL_MOD_EXPR: case FLOOR_MOD_EXPR: case ROUND_MOD_EXPR: + /* {s,u}mod_optab implements TRUNC_MOD_EXPR. For scalar modes, +expansion has code to adjust TRUNC_MOD_EXPR into the desired other +modes, but for vector modes it does not. The adjustment code +should be instead emitted in tree-vect-patterns.cc. */ + if (TREE_CODE (type) == VECTOR_TYPE) + return unknown_optab; + /* FALLTHRU */ +case TRUNC_MOD_EXPR: return TYPE_UNSIGNED (type) ? umod_optab : smod_optab; -case RDIV_EXPR: -case TRUNC_DIV_EXPR: case CEIL_DIV_EXPR: case FLOOR_DIV_EXPR: case ROUND_DIV_EXPR: + /* {,u}{s,u}div_optab implements {TRUNC,EXACT}_DIV_EXPR or RDIV_EXPR. +For scalar modes, expansion has code to adjust TRUNC_DIV_EXPR +into the desired other modes, but for vector modes it does not. +The adjustment code should be instead emitted in +tree-vect-patterns.cc. */ + if (TREE_CODE (type) == VECTOR_TYPE) + return unknown_optab; + /* FALLTHRU */ +case RDIV_EXPR: +case TRUNC_DIV_EXPR: case EXACT_DIV_EXPR: if (TYPE_SATURATING (type)) return TYPE_UNSIGNED (type) ? usdiv_optab : ssdiv_optab; --- gcc/testsuite/gfortran.dg/pr102860.f90.jj 2022-01-19 10:46:56.712675060 +0100 +++ gcc/testsuite/gfortran.dg/pr102860.f90 2022-01-19 10:46:56.712675060 +0100 @@ -0,0 +1,10 @@ +! PR middle-end/102860 +! { dg-do compile { target { powerpc*-*-* } } } +! { dg-require-effective-target powerpc_vsx_ok } +! { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power10" } } +! { dg-options "-O2 -mcpu=power10" } + +function foo(a) + integer(kind=4) :: a(1024) + a(:)
Re: [PATCH] tree-optimization/104112 - add check for vect epilogue reduc reuse
Richard Biener writes: > This adds a missing check for the availability of intermediate vector > types required to re-use the accumulator of a vectorized reduction > in the vectorized epilogue. For SVE and VNx2DF vs V2DF with > -msve-vector-bits=512 for example V4DF is not available. > > In addition to that we have to verify the reduction operation is > supported, otherwise we for example on i?86 get vector code that's > later decomposed again by vector lowering when trying to use > a V2HI epilogue for a V8HI reduction with a target without > TARGET_MMX_WITH_SSE. > > It might be we want -Wvector-operation-performance for all vect.exp > tests but that seems to have existing regressions. > > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK? LGTM. The earlier patch also passed testing on SVE FWIW. Thanks, Richard > > Thanks, > Richard. > > 2022-01-19 Richard Biener > > PR tree-optimization/104112 > * tree-vect-loop.cc (vect_find_reusable_accumulator): Check > for required intermediate vector types. > > * gcc.dg/vect/pr104112-1.c: New testcase. > * gcc.dg/vect/pr104112-2.c: New testcase. > --- > gcc/testsuite/gcc.dg/vect/pr104112-1.c | 18 ++ > gcc/testsuite/gcc.dg/vect/pr104112-2.c | 11 +++ > gcc/tree-vect-loop.cc | 15 ++- > 3 files changed, 43 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/pr104112-1.c > create mode 100644 gcc/testsuite/gcc.dg/vect/pr104112-2.c > > diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-1.c > b/gcc/testsuite/gcc.dg/vect/pr104112-1.c > new file mode 100644 > index 000..84e69b85170 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr104112-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-Ofast" } */ > +/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=512" { > target aarch64-*-* } } */ > + > +void > +boom(int n, double *a, double *x) > +{ > + int i, j; > + double temp; > + > + for (j = n; j >= 1; --j) > +{ > + temp = x[j]; > + for (i = j - 1; i >= 1; --i) > + temp += a[i + j] * x[i]; > + x[j] = temp; > +} > +} > diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-2.c > b/gcc/testsuite/gcc.dg/vect/pr104112-2.c > new file mode 100644 > index 000..7469b3c5d84 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr104112-2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* Diagnose vector ops that are later decomposed. */ > +/* { dg-additional-options "-Wvector-operation-performance" } */ > + > +unsigned short foo (unsigned short *a, int n) > +{ > + unsigned short sum = 0; > + for (int i = 0; i < n; ++i) > +sum += a[i]; > + return sum; > +} > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > index 0fe3529b2d1..0b2785a5ed6 100644 > --- a/gcc/tree-vect-loop.cc > +++ b/gcc/tree-vect-loop.cc > @@ -4979,9 +4979,22 @@ vect_find_reusable_accumulator (loop_vec_info > loop_vinfo, >/* Handle the case where we can reduce wider vectors to narrower ones. */ >tree vectype = STMT_VINFO_VECTYPE (reduc_info); >tree old_vectype = TREE_TYPE (accumulator->reduc_input); > + unsigned HOST_WIDE_INT m; >if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype), > - TYPE_VECTOR_SUBPARTS (vectype))) > + TYPE_VECTOR_SUBPARTS (vectype), )) > return false; > + /* Check the intermediate vector types are available. */ > + while (m > 2) > +{ > + m /= 2; > + tree intermediate_vectype = get_related_vectype_for_scalar_type > + (TYPE_MODE (vectype), TREE_TYPE (vectype), > + exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m)); > + if (!intermediate_vectype > + || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info), > + intermediate_vectype)) > + return false; > +} > >/* Non-SLP reductions might apply an adjustment after the reduction > operation, in order to simplify the initialization of the accumulator.
Re: [PATCH] Update per-file selftest and finalization hooks for .c to .cc renaming
On Wed, Jan 19, 2022 at 2:45 PM David Malcolm via Gcc-patches wrote: > > This is mostly a mechanical change, apart from: > - fix the name of opt_proposer_c to match its filename (opt-suggestions.cc) > - delete a bogus "modref_c_tests" decl from ipa-modref-tree.h that's been > present since the initial commit of that file > (d119f34c952f8718fdbabc63e2f369a16e92fa07) > > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. > > OK for trunk? (arguably it's an "obvious" followup to the .c to .cc > renaming, but we're in stage 4; I think it makes sense to do it now, > as it seems logically connected to the renaming, and low-risk) OK. > gcc/ChangeLog: > * attribs.cc (attribute_c_tests): Rename to... > (attribute_cc_tests): ...this. > * bitmap.cc (bitmap_c_tests): Rename to... > (bitmap_cc_tests): ...this. > * cgraph.cc (cgraph_c_finalize): Rename to... > (cgraph_cc_finalize): ...this. > (cgraph_c_tests): Rename to... > (cgraph_cc_tests): ...this. > * cgraph.h (cgraph_c_finalize): Rename to... > (cgraph_cc_finalize): ...this. > (cgraphunit_c_finalize): Rename to... > (cgraphunit_cc_finalize): ...this. > * cgraphunit.cc (cgraphunit_c_finalize): Rename to... > (cgraphunit_cc_finalize): ...this. > * convert.cc (convert_c_tests): Rename to... > (convert_cc_tests): ...this. > * dbgcnt.cc (dbgcnt_c_tests): Rename to... > (dbgcnt_cc_tests): ...this. > * diagnostic-show-locus.cc (diagnostic_show_locus_c_tests): Rename > to... > (diagnostic_show_locus_cc_tests): ...this. > * diagnostic.cc (diagnostic_c_tests): Rename to... > (diagnostic_cc_tests): ...this. > * dumpfile.cc (dumpfile_c_tests): Rename to... > (dumpfile_cc_tests): ...this. > * dwarf2out.cc (dwarf2out_c_finalize): Rename to... > (dwarf2out_cc_finalize): ...this. > * dwarf2out.h (dwarf2out_c_finalize): Rename to... > (dwarf2out_cc_finalize): ...this. > * edit-context.cc (edit_context_c_tests): Rename to... > (edit_context_cc_tests): ...this. > * et-forest.cc (et_forest_c_tests): Rename to... > (et_forest_cc_tests): ...this. > * fibonacci_heap.cc (fibonacci_heap_c_tests): Rename to... > (fibonacci_heap_cc_tests): ...this. > * fold-const.cc (fold_const_c_tests): Rename to... > (fold_const_cc_tests): ...this. > * function-tests.cc (function_tests_c_tests): Rename to... > (function_tests_cc_tests): ...this. > * gcse.cc (gcse_c_finalize): Rename to... > (gcse_cc_finalize): ...this. > * gcse.h (gcse_c_finalize): Rename to... > (gcse_cc_finalize): ...this. > * ggc-tests.cc (ggc_tests_c_tests): Rename to... > (ggc_tests_cc_tests): ...this. > * gimple-ssa-store-merging.cc (store_merging_c_tests): Rename to... > (store_merging_cc_tests): ...this. > * gimple.cc (gimple_c_tests): Rename to... > (gimple_cc_tests): ...this. > * hash-map-tests.cc (hash_map_tests_c_tests): Rename to... > (hash_map_tests_cc_tests): ...this. > * hash-set-tests.cc (hash_set_tests_c_tests): Rename to... > (hash_set_tests_cc_tests): ...this. > * input.cc (input_c_tests): Rename to... > (input_cc_tests): ...this. > * ipa-cp.cc (ipa_cp_c_finalize): Rename to... > (ipa_cp_cc_finalize): ...this. > * ipa-fnsummary.cc (ipa_fnsummary_c_finalize): Rename to... > (ipa_fnsummary_cc_finalize): ...this. > * ipa-fnsummary.h (ipa_fnsummary_c_finalize): Rename to... > (ipa_fnsummary_cc_finalize): ...this. > * ipa-modref-tree.cc (ipa_modref_tree_c_tests): Rename to... > (ipa_modref_tree_cc_tests): ...this. > * ipa-modref-tree.h (modref_c_tests): Delete bogus decl. > * ipa-modref.cc (ipa_modref_c_finalize): Rename to... > (ipa_modref_cc_finalize): ...this. > * ipa-modref.h (ipa_modref_c_finalize): Rename to... > (ipa_modref_cc_finalize): ...this. > * ipa-prop.h (ipa_cp_c_finalize): Rename to... > (ipa_cp_cc_finalize): ...this. > * ipa-reference.cc (ipa_reference_c_finalize): Rename to... > (ipa_reference_cc_finalize): ...this. > * ipa-reference.h (ipa_reference_c_finalize): Rename to... > (ipa_reference_cc_finalize): ...this. > * ira-costs.cc (ira_costs_c_finalize): Rename to... > (ira_costs_cc_finalize): ...this. > * ira.h (ira_costs_c_finalize): Rename to... > (ira_costs_cc_finalize): ...this. > * opt-suggestions.cc (opt_proposer_c_tests): Rename to... > (opt_suggestions_cc_tests): ...this. > * opts.cc (opts_c_tests): Rename to... > (opts_cc_tests): ...this. > * predict.cc (predict_c_tests): Rename to... > (predict_cc_tests): ...this. >
[PATCH] Update per-file selftest and finalization hooks for .c to .cc renaming
This is mostly a mechanical change, apart from: - fix the name of opt_proposer_c to match its filename (opt-suggestions.cc) - delete a bogus "modref_c_tests" decl from ipa-modref-tree.h that's been present since the initial commit of that file (d119f34c952f8718fdbabc63e2f369a16e92fa07) Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. OK for trunk? (arguably it's an "obvious" followup to the .c to .cc renaming, but we're in stage 4; I think it makes sense to do it now, as it seems logically connected to the renaming, and low-risk) gcc/ChangeLog: * attribs.cc (attribute_c_tests): Rename to... (attribute_cc_tests): ...this. * bitmap.cc (bitmap_c_tests): Rename to... (bitmap_cc_tests): ...this. * cgraph.cc (cgraph_c_finalize): Rename to... (cgraph_cc_finalize): ...this. (cgraph_c_tests): Rename to... (cgraph_cc_tests): ...this. * cgraph.h (cgraph_c_finalize): Rename to... (cgraph_cc_finalize): ...this. (cgraphunit_c_finalize): Rename to... (cgraphunit_cc_finalize): ...this. * cgraphunit.cc (cgraphunit_c_finalize): Rename to... (cgraphunit_cc_finalize): ...this. * convert.cc (convert_c_tests): Rename to... (convert_cc_tests): ...this. * dbgcnt.cc (dbgcnt_c_tests): Rename to... (dbgcnt_cc_tests): ...this. * diagnostic-show-locus.cc (diagnostic_show_locus_c_tests): Rename to... (diagnostic_show_locus_cc_tests): ...this. * diagnostic.cc (diagnostic_c_tests): Rename to... (diagnostic_cc_tests): ...this. * dumpfile.cc (dumpfile_c_tests): Rename to... (dumpfile_cc_tests): ...this. * dwarf2out.cc (dwarf2out_c_finalize): Rename to... (dwarf2out_cc_finalize): ...this. * dwarf2out.h (dwarf2out_c_finalize): Rename to... (dwarf2out_cc_finalize): ...this. * edit-context.cc (edit_context_c_tests): Rename to... (edit_context_cc_tests): ...this. * et-forest.cc (et_forest_c_tests): Rename to... (et_forest_cc_tests): ...this. * fibonacci_heap.cc (fibonacci_heap_c_tests): Rename to... (fibonacci_heap_cc_tests): ...this. * fold-const.cc (fold_const_c_tests): Rename to... (fold_const_cc_tests): ...this. * function-tests.cc (function_tests_c_tests): Rename to... (function_tests_cc_tests): ...this. * gcse.cc (gcse_c_finalize): Rename to... (gcse_cc_finalize): ...this. * gcse.h (gcse_c_finalize): Rename to... (gcse_cc_finalize): ...this. * ggc-tests.cc (ggc_tests_c_tests): Rename to... (ggc_tests_cc_tests): ...this. * gimple-ssa-store-merging.cc (store_merging_c_tests): Rename to... (store_merging_cc_tests): ...this. * gimple.cc (gimple_c_tests): Rename to... (gimple_cc_tests): ...this. * hash-map-tests.cc (hash_map_tests_c_tests): Rename to... (hash_map_tests_cc_tests): ...this. * hash-set-tests.cc (hash_set_tests_c_tests): Rename to... (hash_set_tests_cc_tests): ...this. * input.cc (input_c_tests): Rename to... (input_cc_tests): ...this. * ipa-cp.cc (ipa_cp_c_finalize): Rename to... (ipa_cp_cc_finalize): ...this. * ipa-fnsummary.cc (ipa_fnsummary_c_finalize): Rename to... (ipa_fnsummary_cc_finalize): ...this. * ipa-fnsummary.h (ipa_fnsummary_c_finalize): Rename to... (ipa_fnsummary_cc_finalize): ...this. * ipa-modref-tree.cc (ipa_modref_tree_c_tests): Rename to... (ipa_modref_tree_cc_tests): ...this. * ipa-modref-tree.h (modref_c_tests): Delete bogus decl. * ipa-modref.cc (ipa_modref_c_finalize): Rename to... (ipa_modref_cc_finalize): ...this. * ipa-modref.h (ipa_modref_c_finalize): Rename to... (ipa_modref_cc_finalize): ...this. * ipa-prop.h (ipa_cp_c_finalize): Rename to... (ipa_cp_cc_finalize): ...this. * ipa-reference.cc (ipa_reference_c_finalize): Rename to... (ipa_reference_cc_finalize): ...this. * ipa-reference.h (ipa_reference_c_finalize): Rename to... (ipa_reference_cc_finalize): ...this. * ira-costs.cc (ira_costs_c_finalize): Rename to... (ira_costs_cc_finalize): ...this. * ira.h (ira_costs_c_finalize): Rename to... (ira_costs_cc_finalize): ...this. * opt-suggestions.cc (opt_proposer_c_tests): Rename to... (opt_suggestions_cc_tests): ...this. * opts.cc (opts_c_tests): Rename to... (opts_cc_tests): ...this. * predict.cc (predict_c_tests): Rename to... (predict_cc_tests): ...this. * pretty-print.cc (pretty_print_c_tests): Rename to... (pretty_print_cc_tests): ...this. * read-rtl-function.cc (read_rtl_function_c_tests): Rename to... (read_rtl_function_cc_tests): ...this. * rtl-tests.cc (rtl_tests_c_tests): Rename
[PATCH] tree-optimization/104112 - add check for vect epilogue reduc reuse
This adds a missing check for the availability of intermediate vector types required to re-use the accumulator of a vectorized reduction in the vectorized epilogue. For SVE and VNx2DF vs V2DF with -msve-vector-bits=512 for example V4DF is not available. In addition to that we have to verify the reduction operation is supported, otherwise we for example on i?86 get vector code that's later decomposed again by vector lowering when trying to use a V2HI epilogue for a V8HI reduction with a target without TARGET_MMX_WITH_SSE. It might be we want -Wvector-operation-performance for all vect.exp tests but that seems to have existing regressions. Bootstrapped and tested on x86_64-unknown-linux-gnu, OK? Thanks, Richard. 2022-01-19 Richard Biener PR tree-optimization/104112 * tree-vect-loop.cc (vect_find_reusable_accumulator): Check for required intermediate vector types. * gcc.dg/vect/pr104112-1.c: New testcase. * gcc.dg/vect/pr104112-2.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr104112-1.c | 18 ++ gcc/testsuite/gcc.dg/vect/pr104112-2.c | 11 +++ gcc/tree-vect-loop.cc | 15 ++- 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr104112-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr104112-2.c diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-1.c b/gcc/testsuite/gcc.dg/vect/pr104112-1.c new file mode 100644 index 000..84e69b85170 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104112-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast" } */ +/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=512" { target aarch64-*-* } } */ + +void +boom(int n, double *a, double *x) +{ + int i, j; + double temp; + + for (j = n; j >= 1; --j) +{ + temp = x[j]; + for (i = j - 1; i >= 1; --i) + temp += a[i + j] * x[i]; + x[j] = temp; +} +} diff --git a/gcc/testsuite/gcc.dg/vect/pr104112-2.c b/gcc/testsuite/gcc.dg/vect/pr104112-2.c new file mode 100644 index 000..7469b3c5d84 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104112-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* Diagnose vector ops that are later decomposed. */ +/* { dg-additional-options "-Wvector-operation-performance" } */ + +unsigned short foo (unsigned short *a, int n) +{ + unsigned short sum = 0; + for (int i = 0; i < n; ++i) +sum += a[i]; + return sum; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 0fe3529b2d1..0b2785a5ed6 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4979,9 +4979,22 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo, /* Handle the case where we can reduce wider vectors to narrower ones. */ tree vectype = STMT_VINFO_VECTYPE (reduc_info); tree old_vectype = TREE_TYPE (accumulator->reduc_input); + unsigned HOST_WIDE_INT m; if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype), - TYPE_VECTOR_SUBPARTS (vectype))) + TYPE_VECTOR_SUBPARTS (vectype), )) return false; + /* Check the intermediate vector types are available. */ + while (m > 2) +{ + m /= 2; + tree intermediate_vectype = get_related_vectype_for_scalar_type + (TYPE_MODE (vectype), TREE_TYPE (vectype), +exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m)); + if (!intermediate_vectype + || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info), + intermediate_vectype)) + return false; +} /* Non-SLP reductions might apply an adjustment after the reduction operation, in order to simplify the initialization of the accumulator. -- 2.31.1
Re: [PATCH 2/2] [Ada] Set target_cpu to x32 for x86_64-linux-gnux32
> > OK, thanks. > > OK for backports? Yes.
Re: [PATCH 1/2] [Ada] Compile s-mmap and 128bit on x86_64-linux-gnux32
> OK for backports? Yes.
Re: [PATCH 2/2] [Ada] Set target_cpu to x32 for x86_64-linux-gnux32
On Wed, Jan 19, 2022 at 1:06 AM Arnaud Charlet wrote: > > OK, thanks. OK for backports? Thanks. > > Since the x86_64-linux-gnux32 compiler is actually an x32 compiler, set > > target_cpu to x32 for x86_64-linux-gnux32. > > > > PR ada/103538 > > * gcc-interface/Makefile.in (target_cpu): Set to x32 for > > x86_64-linux-gnux32. > > --- > > gcc/ada/gcc-interface/Makefile.in | 7 +++ > > 1 file changed, 7 insertions(+) > > > > diff --git a/gcc/ada/gcc-interface/Makefile.in > > b/gcc/ada/gcc-interface/Makefile.in > > index 53d0739470a..b8a24708280 100644 > > --- a/gcc/ada/gcc-interface/Makefile.in > > +++ b/gcc/ada/gcc-interface/Makefile.in > > @@ -350,6 +350,13 @@ ifeq ($(strip $(filter-out x86_64, $(target_cpu))),) > >endif > > endif > > > > +# The x86_64-linux-gnux32 compiler is actually an x32 compiler > > +ifeq ($(strip $(filter-out x86_64 linux-gnux32%, $(target_cpu) > > $(target_os))),) > > + ifneq ($(strip $(MULTISUBDIR)),/64) > > +target_cpu:=x32 > > + endif > > +endif > > + > > # The SuSE PowerPC64/Linux compiler is actually a 32-bit PowerPC compiler > > ifeq ($(strip $(filter-out powerpc64 suse linux%, $(target_cpu) > > $(target_vendor) $(target_os))),) > >target_cpu:=powerpc > > -- > > 2.34.1 > > -- H.J.
Re: [PATCH 1/2] [Ada] Compile s-mmap and 128bit on x86_64-linux-gnux32
On Wed, Jan 19, 2022 at 1:06 AM Arnaud Charlet wrote: > > OK, thanks. OK for backports? Thanks. > > PR ada/103538 > > * Makefile.rtl (LIBGNAT_TARGET_PAIRS): Add > > $(TRASYM_DWARF_UNIX_PAIRS), > > s-tsmona.adb > $(GNATRTL_128BIT_PAIRS). > > (EXTRA_GNATRTL_NONTASKING_OBJS): Add $(TRASYM_DWARF_UNIX_OBJS) > > and $(GNATRTL_128BIT_OBJS). > > --- > > gcc/ada/Makefile.rtl | 5 + > > 1 file changed, 5 insertions(+) > > > > diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl > > index 1b066ad6b14..6d60aea75a8 100644 > > --- a/gcc/ada/Makefile.rtl > > +++ b/gcc/ada/Makefile.rtl > > @@ -2650,13 +2650,18 @@ ifeq ($(strip $(filter-out %x32 > > linux%,$(target_cpu) $(target_os))),) > >s-tasinf.adb >s-tpopsp.adb >s-taspri.ads > + $(TRASYM_DWARF_UNIX_PAIRS) \ > > + s-tsmona.adb >$(ATOMICS_TARGET_PAIRS) \ > >$(X86_64_TARGET_PAIRS) \ > > + $(GNATRTL_128BIT_PAIRS) \ > >system.ads > > >TOOLS_TARGET_PAIRS = indepsw.adb > > >EXTRA_GNATRTL_NONTASKING_OBJS=g-sse.o g-ssvety.o > > + EXTRA_GNATRTL_NONTASKING_OBJS+=$(TRASYM_DWARF_UNIX_OBJS) > > + EXTRA_GNATRTL_NONTASKING_OBJS+=$(GNATRTL_128BIT_OBJS) > >EXTRA_GNATRTL_TASKING_OBJS=s-linux.o a-exetim.o > >EH_MECHANISM=-gcc > >THREADSLIB=-lpthread -lrt > > -- > > 2.34.1 > > -- H.J.
Re: [RFC] Port git gcc-descr to Python
On 1/19/22 13:49, Martin Jambor wrote: Hi, On Wed, Jan 19 2022, Martin Liška wrote: On 10/18/21 11:01, Martin Liška wrote: On 10/12/21 10:59, Martin Liška wrote: Hello. There's a complete patch that implements both git gcc-descr and gcc-undesrc and sets corresponding git aliases to use them. Ready to be installed? Thanks, Martin All right, so Jakub told me at IRC that we doesn't support porting to Python. However, he promised supporting the changes I made in the original shell script. @Jakub: May I remind this, please? putting the following line in ..git/config seems to do the trick? gcc-descr = "!f() { if test ${1:-no} = --short; then c=${2:-master}; r=$(git describe --all --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)-\\([0-9]\\+\\)-g[0-9a-f]*$,r\\2-\\3,p;s,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)$,r\\2-0,p'); elif test ${1:-no} = --full; then c=${2:-master}; r=$(git describe --all --abbrev=40 --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); else c=${1:-master}; r=$(git describe --all --abbrev=14 --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); expr match ${r:-no} '^r[0-9]\\+$' >/dev/null && r=${r}-0-g$(git rev-parse ${2:-master}); fi; if test -n $r; then o=$(git config --get gcc-config.upstream); rr=$(echo $r | sed -n 's,^r\\([0-9]\\+\\)-[0-9]\\+\\(-g[0-9a-f]\\+\\)\\?$,\\1,p'); if git rev-parse --verify --quiet ${o:-origin}/releases/gcc-$rr >/dev/null; then m=releases/gcc-$rr; else m=master; fi; git merge-base --is-ancestor $c ${o:-origin}/$m && \\echo ${r}; fi; }; f" Derived from the following before squashing all the new lines: if test ${1:-no} = --short; then c=${2:-master}; r=$(git describe --all --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)-\\([0-9]\\+\\)-g[0-9a-f]*$,r\\2-\\3,p;s,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)$,r\\2-0,p'); elif test ${1:-no} = --long; then c=${2:-master}; r=$(git describe --all --abbrev=40 --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); else c=${1:-master}; r=$(git describe --all --abbrev=14 --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); expr match ${r:-no} '^r[0-9]\\+$' >/dev/null && r=${r}-0-g$(git rev-parse ${2:-master}); fi; if test -n $r; then o=$(git config --get gcc-config.upstream); rr=$(echo $r | sed -n 's,^r\\([0-9]\\+\\)-[0-9]\\+\\(-g[0-9a-f]\\+\\)\\?$,\\1,p'); if git rev-parse --verify --quiet ${o:-origin}/releases/gcc-$rr >/dev/null; then m=releases/gcc-$rr; else m=master; fi; git merge-base --is-ancestor $c ${o:-origin}/$m && \\echo ${r}; fi; Anyway, can we please put the nicely formatted bash script to contrib/git-describe.sh (contrib/git-undescribe.sh), having that squashed to one line is just crazy. Martin, can you please do that? (And if you wanted to ask then no, I am not touching contrib/gcc-git-customization.sh, not even with a long pole, it scares me :-) Sure, I can then adjust it if we end up with an outlined version where to code sits in proper .sh files. Martin Martin
[PATCH] tree-optimization/104114 - avoid diagnosing V1mode lowering
Currently we diagnose vector lowering of V1mode operations that are not natively supported into V_C_E, scalar op plus CTOR with -Wvector-operation-performance but that's hardly useful behavior even though the way we lower things can be improved. The following disables the diagnostics for the cases the vect.exp testsuite runs into, on x86 that are vect-cond-11.c and vect-singleton_1.c. Bootstrap / regtest pending on x86_64-unknown-linux-gnu. Comments? Thanks, Richard. 2022-01-19 Richard Biener PR tree-optimization/104114 * tree-vect-generic.c (expand_vector_piecewise): Do not diagnose single element vector decomposition. --- gcc/tree-vect-generic.cc | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index 5814a71a5bb..f4fc7d16119 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -317,7 +317,10 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, int i; location_t loc = gimple_location (gsi_stmt (*gsi)); - if (ret_type || !parallel_p) + if (nunits == 1) +/* Do not diagnose decomposing single element vectors. */ +; + else if (ret_type || !parallel_p) warning_at (loc, OPT_Wvector_operation_performance, "vector operation will be expanded piecewise"); else -- 2.31.1
Re: [RFC] Port git gcc-descr to Python
Hi, On Wed, Jan 19 2022, Martin Liška wrote: > On 10/18/21 11:01, Martin Liška wrote: >> On 10/12/21 10:59, Martin Liška wrote: >>> Hello. >>> >>> There's a complete patch that implements both git gcc-descr and gcc-undesrc >>> and sets corresponding git aliases to use them. >>> >>> Ready to be installed? >>> Thanks, >>> Martin >> >> All right, so Jakub told me at IRC that we doesn't support porting to Python. >> However, he promised supporting the changes I made in the original shell >> script. > > @Jakub: May I remind this, please? putting the following line in ..git/config seems to do the trick? gcc-descr = "!f() { if test ${1:-no} = --short; then c=${2:-master}; r=$(git describe --all --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)-\\([0-9]\\+\\)-g[0-9a-f]*$,r\\2-\\3,p;s,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)$,r\\2-0,p'); elif test ${1:-no} = --full; then c=${2:-master}; r=$(git describe --all --abbrev=40 --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); else c=${1:-master}; r=$(git describe --all --abbrev=14 --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); expr match ${r:-no} '^r[0-9]\\+$' >/dev/null && r=${r}-0-g$(git rev-parse ${2:-master}); fi; if test -n $r; then o=$(git config --get gcc-config.upstream); rr=$(echo $r | sed -n 's,^r\\([0-9]\\+\\)-[0-9]\\+\\(-g[0-9a-f]\\+\\)\\?$,\\1,p'); if git rev-parse --verify --quiet ${o:-origin}/releases/gcc-$rr >/dev/null; then m=releases/gcc-$rr; else m=master; fi; git merge-base --is-ancestor $c ${o:-origin}/$m && \\echo ${r}; fi; }; f" Derived from the following before squashing all the new lines: if test ${1:-no} = --short; then c=${2:-master}; r=$(git describe --all --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)-\\([0-9]\\+\\)-g[0-9a-f]*$,r\\2-\\3,p;s,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)$,r\\2-0,p'); elif test ${1:-no} = --long; then c=${2:-master}; r=$(git describe --all --abbrev=40 --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); else c=${1:-master}; r=$(git describe --all --abbrev=14 --match 'basepoints/gcc-[0-9]*' $c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); expr match ${r:-no} '^r[0-9]\\+$' >/dev/null && r=${r}-0-g$(git rev-parse ${2:-master}); fi; if test -n $r; then o=$(git config --get gcc-config.upstream); rr=$(echo $r | sed -n 's,^r\\([0-9]\\+\\)-[0-9]\\+\\(-g[0-9a-f]\\+\\)\\?$,\\1,p'); if git rev-parse --verify --quiet ${o:-origin}/releases/gcc-$rr >/dev/null; then m=releases/gcc-$rr; else m=master; fi; git merge-base --is-ancestor $c ${o:-origin}/$m && \\echo ${r}; fi; (And if you wanted to ask then no, I am not touching contrib/gcc-git-customization.sh, not even with a long pole, it scares me :-) Martin
Re: [PATCH] libstdc++: Fix for non-constexpr math_errhandling
On Wednesday, 19 January 2022 13:07:26 CET Jonathan Wakely wrote: > On Wed, 19 Jan 2022 at 08:10, Matthias Kretz wrote: > > Follow-up to my last patch. This one is a more thorough fix. Tested on > > x86_64- > > linux. OK for trunk? > > > > 8< > > > > Use SFINAE magic to support: "It is unspecified whether math_errhandling > > is a macro or an identifier with external linkage." [C Standard] > > The patch is OK for trunk, but I don't understand what the C standard means > here. > > "the macro math_errhandling expands to [...]. It is unspecified whether > math_errhandling is a macro or an identifier with external linkage." > > So is it a macro or not? I agree the quote I used is unclear. The complete paragraph: The macros MATH_ERRNO MATH_ERREXCEPT expand to the integer constants 1 and 2, respectively; the macro math_errhandling expands to an expression that has type int and the value MATH_ERRNO, MATH_ERREXCEPT, or the bitwise OR of both. The value of math_errhandling is constant for the duration of the program. It is unspecified whether math_errhandling is a macro or an identifier with external linkage. If a macro definition is suppressed or a program defines an identifier with the name math_errhandling, the behavior is undefined. If the expression math_errhandling & MATH_ERREXCEPT can be nonzero, the implementation shall define the macros FE_DIVBYZERO, FE_INVALID, and FE_OVERFLOW in . -- ── Dr. Matthias Kretz https://mattkretz.github.io GSI Helmholtz Centre for Heavy Ion Research https://gsi.de stdₓ::simd ──
Re: [PATCH] libstdc++: Fix for non-constexpr math_errhandling
On Wed, 19 Jan 2022 at 08:10, Matthias Kretz wrote: > Follow-up to my last patch. This one is a more thorough fix. Tested on > x86_64- > linux. OK for trunk? > > 8< > > Use SFINAE magic to support: "It is unspecified whether math_errhandling > is a macro or an identifier with external linkage." [C Standard] > The patch is OK for trunk, but I don't understand what the C standard means here. "the macro math_errhandling expands to [...]. It is unspecified whether math_errhandling is a macro or an identifier with external linkage." So is it a macro or not?
[PATCH][pushed] testsuite: Rename a few C++ to .C or .cc.
The tests are C++ code, so use a proper file extension. gcc/testsuite/ChangeLog: * g++.dg/ext/boolcomplex-1.c: Moved to... * g++.dg/ext/boolcomplex-1.C: ...here. * g++.dg/opt/pr47639.c: Moved to... * g++.dg/opt/pr47639.C: ...here. * g++.dg/pr83979.c: Moved to... * g++.dg/pr83979.C: ...here. * g++.dg/tm/asm-1.c: Moved to... * g++.dg/tm/asm-1.C: ...here. * g++.dg/vect/pr71483.c: Moved to... * g++.dg/vect/pr71483.cc: ...here. --- gcc/testsuite/g++.dg/ext/{boolcomplex-1.c => boolcomplex-1.C} | 0 gcc/testsuite/g++.dg/opt/{pr47639.c => pr47639.C} | 0 gcc/testsuite/g++.dg/{pr83979.c => pr83979.C} | 0 gcc/testsuite/g++.dg/tm/{asm-1.c => asm-1.C} | 0 gcc/testsuite/g++.dg/vect/{pr71483.c => pr71483.cc} | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename gcc/testsuite/g++.dg/ext/{boolcomplex-1.c => boolcomplex-1.C} (100%) rename gcc/testsuite/g++.dg/opt/{pr47639.c => pr47639.C} (100%) rename gcc/testsuite/g++.dg/{pr83979.c => pr83979.C} (100%) rename gcc/testsuite/g++.dg/tm/{asm-1.c => asm-1.C} (100%) rename gcc/testsuite/g++.dg/vect/{pr71483.c => pr71483.cc} (100%) diff --git a/gcc/testsuite/g++.dg/ext/boolcomplex-1.c b/gcc/testsuite/g++.dg/ext/boolcomplex-1.C similarity index 100% rename from gcc/testsuite/g++.dg/ext/boolcomplex-1.c rename to gcc/testsuite/g++.dg/ext/boolcomplex-1.C diff --git a/gcc/testsuite/g++.dg/opt/pr47639.c b/gcc/testsuite/g++.dg/opt/pr47639.C similarity index 100% rename from gcc/testsuite/g++.dg/opt/pr47639.c rename to gcc/testsuite/g++.dg/opt/pr47639.C diff --git a/gcc/testsuite/g++.dg/pr83979.c b/gcc/testsuite/g++.dg/pr83979.C similarity index 100% rename from gcc/testsuite/g++.dg/pr83979.c rename to gcc/testsuite/g++.dg/pr83979.C diff --git a/gcc/testsuite/g++.dg/tm/asm-1.c b/gcc/testsuite/g++.dg/tm/asm-1.C similarity index 100% rename from gcc/testsuite/g++.dg/tm/asm-1.c rename to gcc/testsuite/g++.dg/tm/asm-1.C diff --git a/gcc/testsuite/g++.dg/vect/pr71483.c b/gcc/testsuite/g++.dg/vect/pr71483.cc similarity index 100% rename from gcc/testsuite/g++.dg/vect/pr71483.c rename to gcc/testsuite/g++.dg/vect/pr71483.cc -- 2.34.1
[committed] rs6000: Fix bootstrap
On Wed, Jan 19, 2022 at 07:54:19AM +0100, Sebastian Huber wrote: > On 18/01/2022 22:42, Segher Boessenkool wrote: > > > +default: > > > + break; > > Please don't do that. You can do > > > >default: > > break; > > break; > > /* And just to make sure: */ > > break; > > break; > > > > and it will do exactly the same as not having a default at all. Not > > having such useless code is by far the most readable, so please don't > > include a default case at all. > > I removed the default case. I hope this is what you wanted. Unfortunately the removal of default: break; breaks bootstrap: ../../gcc/config/rs6000/rs6000.cc: In function ‘const char* rs6000_machine_from_flags()’: ../../gcc/config/rs6000/rs6000.cc:5940:10: error: enumeration value ‘PROCESSOR_PPC601’ not handled in switch [-Werror=switch] 5940 | switch (rs6000_cpu) | ^ ../../gcc/config/rs6000/rs6000.cc:5940:10: error: enumeration value ‘PROCESSOR_PPC603’ not handled in switch [-Werror=switch] ... default: break; is needed to tell the -Wswitch warning that it is intentional that not all enumerators are handled in the switch. I've committed following as obvious to unbreak the bootstrap. 2022-01-19 Jakub Jelinek * config/rs6000/rs6000.cc (rs6000_machine_from_flags): Add default:. --- gcc/config/rs6000/rs6000.cc.jj +++ gcc/config/rs6000/rs6000.cc @@ -5958,6 +5958,9 @@ rs6000_machine_from_flags (void) case PROCESSOR_PPCE6500: return "e6500"; + +default: + break; } HOST_WIDE_INT flags = rs6000_isa_flags; Jakub
Re: Enable -Werror=format-diag for bootstrap
On 1/18/22 17:28, Martin Liška wrote: On 1/16/22 01:31, Martin Sebor wrote: Hello. (I think your message in unrelated to this email thread). Martin, I've looked into removing the -Wno-error for this warning for just a subset of targets. It seems doable with some hardcoding in configure.ac but if you're planning to do the cleanup for all of them I'm wondering if we should even bother. What do you think? Yes, I've just installed patches that should address all -Wformat-diag errors for all targets we have. Please enable the corresponding -Werror for bootstrap. I've got a patch that enables that. Right now, there should not be any -Wformat-diag in any target. Thus I'm going to install the following patch. Martin Thanks, Martin Martin From 54c453980fb2b04f6605dc8d50e3472e37a3e629 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Wed, 19 Jan 2022 11:54:59 +0100 Subject: [PATCH] Enable -Werror=format-diag during bootstrap. gcc/ChangeLog: * configure.ac: Remove -Wno-error=format-diag. * configure: Regenerate. --- gcc/configure| 4 ++-- gcc/configure.ac | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/configure b/gcc/configure index fd7227078e4..3b228c3d9dc 100755 --- a/gcc/configure +++ b/gcc/configure @@ -7089,7 +7089,7 @@ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu loose_warn= save_CXXFLAGS="$CXXFLAGS" -for real_option in -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wno-error=format-diag $wf_opt; do +for real_option in -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual $wf_opt; do # Do the check with the no- prefix removed since gcc silently # accepts any -Wno-* option on purpose case $real_option in @@ -7147,7 +7147,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu c_loose_warn= save_CFLAGS="$CFLAGS" -for real_option in -Wstrict-prototypes -Wmissing-prototypes -Wno-error=format-diag; do +for real_option in -Wstrict-prototypes -Wmissing-prototypes ; do # Do the check with the no- prefix removed since gcc silently # accepts any -Wno-* option on purpose case $real_option in diff --git a/gcc/configure.ac b/gcc/configure.ac index 40cfd38fe56..472d1c8c341 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -573,11 +573,11 @@ AS_IF([test $enable_build_format_warnings = no], [wf_opt=-Wno-format],[wf_opt=]) ACX_PROG_CXX_WARNING_OPTS( m4_quote(m4_do([-W -Wall -Wno-narrowing -Wwrite-strings ], - [-Wcast-qual -Wno-error=format-diag $wf_opt])), + [-Wcast-qual $wf_opt])), [loose_warn]) ACX_PROG_CC_WARNING_OPTS( m4_quote(m4_do([-Wstrict-prototypes -Wmissing-prototypes ], - [-Wno-error=format-diag])), [c_loose_warn]) + [])), [c_loose_warn]) ACX_PROG_CXX_WARNING_OPTS( m4_quote(m4_do([-Wmissing-format-attribute ], [-Woverloaded-virtual])), [strict_warn]) -- 2.34.1
Re: [vect] PR103997: Fix epilogue mode skipping
On Tue, 18 Jan 2022, Andre Vieira (lists) wrote: > > On 14/01/2022 09:57, Richard Biener wrote: > > > > The 'used_vector_modes' is also a heuristic by itself since it registers > > every vector type we query, not only those that are used in the end ... > > > > So it's really all heuristics that can eventually go bad. > > > > IMHO remembering the VF that we ended up with (maybe w/o unrolling) > > for each analyzed vector_mode[] might be really the easiest thing to do, > > that should make it easy to skip those modes where the VF is larger > > or equal as the VF of the main loop for the purpose of epilogue > > vectorization. Likewise those vector_mode[] that failed analysis can > > be remembered (with -1U VF for example). > > > > Richard. > > I liked the caching suggestion, so here it is. Sorry for the delay, wanted to > post this after pushing the vect unroll which was waiting on some retesting > for the rebase. LGTM. Thanks, Richard. > gcc/ChangeLog: > > PR 103997 > * tree-vect-loop.c (vect_analyze_loop): Fix mode skipping for > epilogue > vectorization. >
Re: [RFC] Port git gcc-descr to Python
On 10/18/21 11:01, Martin Liška wrote: On 10/12/21 10:59, Martin Liška wrote: Hello. There's a complete patch that implements both git gcc-descr and gcc-undesrc and sets corresponding git aliases to use them. Ready to be installed? Thanks, Martin All right, so Jakub told me at IRC that we doesn't support porting to Python. However, he promised supporting the changes I made in the original shell script. @Jakub: May I remind this, please? Thanks, Martin Cheers, Martin
Re: [PATCH] waccess: Look at calls when tracking clobbers [PR104092]
Richard Biener writes: > On Tue, Jan 18, 2022 at 2:40 PM Richard Sandiford via Gcc-patches > wrote: >> >> In this PR the waccess pass was fed: >> >> D.10779 ={v} {CLOBBER}; >> VIEW_CONVERT_EXPR(D.10779) = .MASK_LOAD_LANES (addr_5(D), >> 64B, _2); >> _7 = D.10779.__val[0]; >> >> However, the tracking of m_clobbers only looked at gassigns, >> so it missed that the clobber on the first line was overwritten >> by the call on the second line. > > Just as a note another possible def can come via asm() outputs > and clobbers. There would have been walk_stmt_load_store_ops > to track all those down (not sure if the function is a good fit here). Hmm. Looking at what the pass is doing in more detail, I'm not sure this approach to handling m_clobbers is safe. The pass walks the blocks in sequence (rather than using a dom walk, say): FOR_EACH_BB_FN (bb, fun) check_block (bb); so it could see the clobber after a later dominating assignment. Similarly check_call_dangling could see a use that is “protected” by a later assignment. Richard
Re: [PATCH] vect.exp: Rename .cc files in the folder to .C.
On Mon, Jan 17, 2022 at 12:10 PM Martin Liška wrote: > > Hi. > > I would like to unify filenames in testsuite and one of the biggest > offenders are vectorizer tests under g++.dg. > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? I think .cc is exactly correct now that the main sources are .cc, renaming to .C doesn't sound like a good idea. Richard. > Thanks, > Martin > > gcc/testsuite/ChangeLog: > > * g++.dg/vect/param-max-aliased-pr26197.cc: Moved to... > * g++.dg/vect/param-max-aliased-pr26197.C: ...here. > * g++.dg/vect/pr102226.cc: Moved to... > * g++.dg/vect/pr102226.C: ...here. > * g++.dg/vect/pr102421.cc: Moved to... > * g++.dg/vect/pr102421.C: ...here. > * g++.dg/vect/pr102572.cc: Moved to... > * g++.dg/vect/pr102572.C: ...here. > * g++.dg/vect/pr102696.cc: Moved to... > * g++.dg/vect/pr102696.C: ...here. > * g++.dg/vect/pr102788.cc: Moved to... > * g++.dg/vect/pr102788.C: ...here. > * g++.dg/vect/pr103494.cc: Moved to... > * g++.dg/vect/pr103494.C: ...here. > * g++.dg/vect/pr19951.cc: Moved to... > * g++.dg/vect/pr19951.C: ...here. > * g++.dg/vect/pr21218.cc: Moved to... > * g++.dg/vect/pr21218.C: ...here. > * g++.dg/vect/pr21734_1.cc: Moved to... > * g++.dg/vect/pr21734_1.C: ...here. > * g++.dg/vect/pr21734_2.cc: Moved to... > * g++.dg/vect/pr21734_2.C: ...here. > * g++.dg/vect/pr22543.cc: Moved to... > * g++.dg/vect/pr22543.C: ...here. > * g++.dg/vect/pr33426-ivdep-2.cc: Moved to... > * g++.dg/vect/pr33426-ivdep-2.C: ...here. > * g++.dg/vect/pr33426-ivdep-3.cc: Moved to... > * g++.dg/vect/pr33426-ivdep-3.C: ...here. > * g++.dg/vect/pr33426-ivdep-4.cc: Moved to... > * g++.dg/vect/pr33426-ivdep-4.C: ...here. > * g++.dg/vect/pr33426-ivdep.cc: Moved to... > * g++.dg/vect/pr33426-ivdep.C: ...here. > * g++.dg/vect/pr33834_1.cc: Moved to... > * g++.dg/vect/pr33834_1.C: ...here. > * g++.dg/vect/pr33834_2.cc: Moved to... > * g++.dg/vect/pr33834_2.C: ...here. > * g++.dg/vect/pr33835.cc: Moved to... > * g++.dg/vect/pr33835.C: ...here. > * g++.dg/vect/pr33860.cc: Moved to... > * g++.dg/vect/pr33860.C: ...here. > * g++.dg/vect/pr33860a.cc: Moved to... > * g++.dg/vect/pr33860a.C: ...here. > * g++.dg/vect/pr36648.cc: Moved to... > * g++.dg/vect/pr36648.C: ...here. > * g++.dg/vect/pr37174.cc: Moved to... > * g++.dg/vect/pr37174.C: ...here. > * g++.dg/vect/pr43771.cc: Moved to... > * g++.dg/vect/pr43771.C: ...here. > * g++.dg/vect/pr44861.cc: Moved to... > * g++.dg/vect/pr44861.C: ...here. > * g++.dg/vect/pr45470-a.cc: Moved to... > * g++.dg/vect/pr45470-a.C: ...here. > * g++.dg/vect/pr45470-b.cc: Moved to... > * g++.dg/vect/pr45470-b.C: ...here. > * g++.dg/vect/pr50698.cc: Moved to... > * g++.dg/vect/pr50698.C: ...here. > * g++.dg/vect/pr51485.cc: Moved to... > * g++.dg/vect/pr51485.C: ...here. > * g++.dg/vect/pr58513.cc: Moved to... > * g++.dg/vect/pr58513.C: ...here. > * g++.dg/vect/pr6.cc: Moved to... > * g++.dg/vect/pr6.C: ...here. > * g++.dg/vect/pr60023.cc: Moved to... > * g++.dg/vect/pr60023.C: ...here. > * g++.dg/vect/pr60559.cc: Moved to... > * g++.dg/vect/pr60559.C: ...here. > * g++.dg/vect/pr60729.cc: Moved to... > * g++.dg/vect/pr60729.C: ...here. > * g++.dg/vect/pr60836.cc: Moved to... > * g++.dg/vect/pr60836.C: ...here. > * g++.dg/vect/pr60896.cc: Moved to... > * g++.dg/vect/pr60896.C: ...here. > * g++.dg/vect/pr61171.cc: Moved to... > * g++.dg/vect/pr61171.C: ...here. > * g++.dg/vect/pr64410.cc: Moved to... > * g++.dg/vect/pr64410.C: ...here. > * g++.dg/vect/pr68145.cc: Moved to... > * g++.dg/vect/pr68145.C: ...here. > * g++.dg/vect/pr68762-1.cc: Moved to... > * g++.dg/vect/pr68762-1.C: ...here. > * g++.dg/vect/pr68762-2.cc: Moved to... > * g++.dg/vect/pr68762-2.C: ...here. > * g++.dg/vect/pr70726.cc: Moved to... > * g++.dg/vect/pr70726.C: ...here. > * g++.dg/vect/pr70729-nest.cc: Moved to... > * g++.dg/vect/pr70729-nest.C: ...here. > * g++.dg/vect/pr70729.cc: Moved to... > * g++.dg/vect/pr70729.C: ...here. > * g++.dg/vect/pr70944.cc: Moved to... > * g++.dg/vect/pr70944.C: ...here. > * g++.dg/vect/pr71483.c: Moved to... > * g++.dg/vect/pr71483.C: ...here. > * g++.dg/vect/pr84362.cc: Moved to... > * g++.dg/vect/pr84362.C: ...here. > * g++.dg/vect/pr84556.cc: Moved to... > *
Re: [PATCH] tree-optimization/103721 - Only add equivalencies that are still valid.
On Wed, Jan 19, 2022 at 2:37 AM Andrew MacLeod via Gcc-patches wrote: > > This patch happens to fix the PR, but I believe it only papers over a > deeper issue that is uncovered in PR104067. > > That said, examination of the issue uncovered an oversight in the way > equivalence sets are merged by the equivalence oracle. I have not seen > an instance via the ranger, but I suspect its just a matter of time. > > Equivalences sets are added to the basic block in which they occur. By > default, the definition of an SSA_NAME create an equivalence in the DEF > block containing just the name itself. Other equivalences are added as > they are encountered in their respective basic blocks, and are created > by combining whatever equivalence is active (via query) in that block > with the new equivalence. An equivalence introduced by an edge is > currently only added the edge destination is a block with a single > predecessor. It is then added to that block. > > When a query is made for the equivalence set for b_2 at BBx, a walk up > the dominance tree is made looking for the first block which has an > equivalence containing b_2. This then becomes the equivalence set for > B2 at BBx. > > If this set contains f_8, before we know that f_8 and b_2 actually > equivalent, we query the equivalence set of f_8 at BBx. If it comes back > with the same set, then the 2 names are equivalent. if the set is > different, then they are not. > > This allows us to register equivalences as we see them without worrying > about invalidating other equivalences. Rather, we defer validation > until we actually care, and pay the cost at the query point. > > This PR has exposed a flaw in how we register equivalence sets around > back edges which could potentially show up somewhere. > > searchvolume_5 was use in previous blocks along the back edge and has an > equivalence set of {_5, world_7} in BB8 > ># searchVolume_11 = PHI <1(4), 0(3)> { _11 } ># currentVolume_8 = searchVolume_5{ _5, _8 , world_7 } > > ># searchVolume_5 = PHI { _5, _11 } ># currentVolume_6 = PHI > > When an equivalence is added for currentVolume_6, a query is made for > the equivalence set for currentVolume_8, which returns the set {_5, _8, > world_7 }. Currently, this is simply combined with {_6} via a bitwise > OR to produce {_5, _6, _8, world_7 }. This is incorrect as _5's > equivalence set is now {_5, _11}. > > _6 and _8 dont appear to be directly related to _5, so we were missing > it. What should be happening is when we merge the equivalence set for > currentVolume_6 and currentVolume_8, each member of the set should be > verified by the same criteria the query uses... ie, ask for the equiv > set for _5, _8, and world_7 at BB9, and if it is different than this > set, it isn't added. > > This would then create the correct equivalence set { _6, _8, world_7 }, > as the query for _5 would come back with {_5, _11} and not evaluate as > equivalent. > > And yes, PHIS all happen in parallel... We don't need to worry about > ordering because even if the PHI hadn't been processed in this order, > the definition would have provided a default of { _5 }, and thus still > not been equivalent and still won't be added to the set. > > Anyway, even tho I think there is an additional problem in this PR, I > wanted to get approval and check this code in under this PR since it > does need to be fixed, and was uncovered here. We wont close the PR > until we are sure the underlying issue is resolved. > > I will also see if I can come up with some kind of test case in the > meantime as well. > > Bootstraps on x86_64-pc-linux-gnu with no regressions. Overall compile > time is very nominal.. less than a 0.1% impact on the EVRP/VRP passes, > so the cost is miniscule. > > OK for trunk? OK. I don't quite understand how what you describe above works, it sounds a bit odd with respect to the idea that equivalences should be transitive but I should note that forming equivalences from PHI nodes with backedges is not possible without being very careful since you will easily end up equating _1 and _1 from different iterations (and thus with different value). Thanks, Richard. > > Andrew
Re: [PATCH] riscv: fix -Wformat-diag errors.
On 1/18/22 23:00, Joseph Myers wrote: On Tue, 18 Jan 2022, Martin Liška wrote: @@ -3309,8 +3309,8 @@ riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, && strcmp (string, "machine")) { warning (OPT_Wattributes, - "argument to %qE attribute is not \"user\", \"supervisor\", or \"machine\"", - name); + "argument to %qE attribute is not %, %, " + "or %", name); *no_add_attrs = true; My reading is that the attribute arguments here are string constants, not identifiers - that is, the ASCII double quotes are correct in the diagnostic output, because those double quotes are part of the literal text that's supposed to appear in the program. (Maybe %<\"user\"%> is the right way of marking it up to indicate that the double quotes are part of the literal program text, not English-level quoting.) Makes sense, I' going to install the following patch. Martin From fdf31ae25f66cd486e655d7ebda36826025b9d0e Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Wed, 19 Jan 2022 10:32:13 +0100 Subject: [PATCH] Update on riscv -Wformat-diag string. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_handle_type_attribute): Update one -Wformat-diag string in warning message. --- gcc/config/riscv/riscv.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 8314864d5e7..6885b4bbad2 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3309,8 +3309,8 @@ riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, && strcmp (string, "machine")) { warning (OPT_Wattributes, - "argument to %qE attribute is not %, %, " - "or %", name); + "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, " + "or %<\"machine\"%>", name); *no_add_attrs = true; } } -- 2.34.1
Re: [PATCH] waccess: Look at calls when tracking clobbers [PR104092]
On Tue, Jan 18, 2022 at 2:40 PM Richard Sandiford via Gcc-patches wrote: > > In this PR the waccess pass was fed: > > D.10779 ={v} {CLOBBER}; > VIEW_CONVERT_EXPR(D.10779) = .MASK_LOAD_LANES (addr_5(D), > 64B, _2); > _7 = D.10779.__val[0]; > > However, the tracking of m_clobbers only looked at gassigns, > so it missed that the clobber on the first line was overwritten > by the call on the second line. Just as a note another possible def can come via asm() outputs and clobbers. There would have been walk_stmt_load_store_ops to track all those down (not sure if the function is a good fit here). > This patch splits the updating of m_clobbers out into its own > function, called after the check_*() routines, and extends it > to handle both gassigns and gcalls. I think that makes sense > as an instance of the "read, operate, write" model, with the > new function being part of "write". > > Previously only the gimple_clobber_p handling was conditional > on m_check_dangling_p, but I think the whole of the new function > can be. We only enter stmts into m_clobbers if m_check_dangling_p, > so we only need to remove them under the same condition. > > Tested on aarch64-linux-gnu. OK to install? > > Richard > > > gcc/ > PR middle-end/104092 > * gimple-ssa-warn-access.cc (pass_waccess::update_clobbers_from_lhs): > New function, split out from... > (pass_waccess::check_stmt): ...here and generalized to calls. > (pass_waccess::check_block): Call it. > > gcc/testsuite/ > * gcc.target/aarch64/sve/acle/general/pr104092.c: New test. > --- > gcc/gimple-ssa-warn-access.cc | 68 +++ > .../aarch64/sve/acle/general/pr104092.c | 7 ++ > 2 files changed, 48 insertions(+), 27 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr104092.c > > diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc > index f639807a78a..25066fa6b89 100644 > --- a/gcc/gimple-ssa-warn-access.cc > +++ b/gcc/gimple-ssa-warn-access.cc > @@ -2094,6 +2094,9 @@ private: >/* Check a non-call statement. */ >void check_stmt (gimple *); > > + /* Update the clobber map based on the lhs of a statement. */ > + void update_clobbers_from_lhs (gimple *); > + >/* Check statements in a basic block. */ >void check_block (basic_block); > > @@ -4270,33 +4273,6 @@ is_auto_decl (tree x) > void > pass_waccess::check_stmt (gimple *stmt) > { > - if (m_check_dangling_p && gimple_clobber_p (stmt)) > -{ > - /* Ignore clobber statemts in blocks with exceptional edges. */ > - basic_block bb = gimple_bb (stmt); > - edge e = EDGE_PRED (bb, 0); > - if (e->flags & EDGE_EH) > - return; > - > - tree var = gimple_assign_lhs (stmt); > - m_clobbers.put (var, stmt); > - return; > -} > - > - if (is_gimple_assign (stmt)) > -{ > - /* Clobbered unnamed temporaries such as compound literals can be > -revived. Check for an assignment to one and remove it from > -M_CLOBBERS. */ > - tree lhs = gimple_assign_lhs (stmt); > - while (handled_component_p (lhs)) > - lhs = TREE_OPERAND (lhs, 0); > - > - if (is_auto_decl (lhs)) > - m_clobbers.remove (lhs); > - return; > -} > - >if (greturn *ret = dyn_cast (stmt)) > { >if (optimize && flag_isolate_erroneous_paths_dereference) > @@ -4326,6 +4302,42 @@ pass_waccess::check_stmt (gimple *stmt) > } > } > > +/* Update the clobber map based on the lhs of STMT. */ > + > +void > +pass_waccess::update_clobbers_from_lhs (gimple *stmt) > +{ > + if (gimple_clobber_p (stmt)) > +{ > + /* Ignore clobber statements in blocks with exceptional edges. */ > + basic_block bb = gimple_bb (stmt); > + edge e = EDGE_PRED (bb, 0); > + if (e->flags & EDGE_EH) > + return; > + > + tree var = gimple_assign_lhs (stmt); > + m_clobbers.put (var, stmt); > + return; > +} > + > + if (is_gimple_assign (stmt) || is_gimple_call (stmt)) > +{ > + /* Clobbered unnamed temporaries such as compound literals can be > +revived. Check for an assignment to one and remove it from > +M_CLOBBERS. */ > + tree lhs = gimple_get_lhs (stmt); > + if (!lhs) > + return; > + > + while (handled_component_p (lhs)) > + lhs = TREE_OPERAND (lhs, 0); > + > + if (is_auto_decl (lhs)) > + m_clobbers.remove (lhs); > + return; > +} > +} > + > /* Check basic block BB for invalid accesses. */ > > void > @@ -4340,6 +4352,8 @@ pass_waccess::check_block (basic_block bb) > check_call (call); >else > check_stmt (stmt); > + if (m_check_dangling_p) > + update_clobbers_from_lhs (stmt); > } > } > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr104092.c > b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr104092.c > new file mode 100644 >
Re: [PATCH] RISC-V: Update testcases info with new implement info
On 1/19/22 10:15, shi...@iscas.ac.cn wrote: |From: LiaoShihua After commit 591b6e00d1bfe12932ca31530d5859f95db8a35a " riscv: fix -Wformat-diag errors ", some strings in implement was changed. This patch update the check info in testcases to sync with it.| Thank you for the fix! Martin
[PATCH] RISC-V: Update testcases info with new implement info
From: LiaoShihua After commit 591b6e00d1bfe12932ca31530d5859f95db8a35a " riscv: fix -Wformat-diag errors ", some strings in implement was changed. This patch update the check info in testcases to sync with it. gcc/testsuite/ChangeLog: * gcc.target/riscv/arch-9.c: Update the check info. * gcc.target/riscv/arch-10.c: Ditto. * gcc.target/riscv/arch-12.c: Ditto. --- gcc/testsuite/gcc.target/riscv/arch-10.c | 2 +- gcc/testsuite/gcc.target/riscv/arch-12.c | 2 +- gcc/testsuite/gcc.target/riscv/arch-9.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/arch-10.c b/gcc/testsuite/gcc.target/riscv/arch-10.c index 47dbda333c9..1052f2e0c14 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-10.c +++ b/gcc/testsuite/gcc.target/riscv/arch-10.c @@ -3,4 +3,4 @@ int foo() { } -/* { dg-error "Extension `f' appear more than one time." "" { target *-*-* } 0 } */ +/* { dg-error "extension 'f' appear more than one time" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/riscv/arch-12.c b/gcc/testsuite/gcc.target/riscv/arch-12.c index 29e16c30815..5ee9a1da5bb 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-12.c +++ b/gcc/testsuite/gcc.target/riscv/arch-12.c @@ -1,4 +1,4 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=rv64im1p2p3 -mabi=lp64" } */ int foo() {} -/* { dg-error "'-march=rv64im1p2p3': For 'm1p2p\\?', version number with more than 2 level is not supported." "" { target *-*-* } 0 } */ +/* { dg-error "'-march=rv64im1p2p3': for 'm1p2p\\?', version number with more than 2 level is not supported" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/gcc.target/riscv/arch-9.c b/gcc/testsuite/gcc.target/riscv/arch-9.c index 74e64103563..d00e99d3534 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-9.c +++ b/gcc/testsuite/gcc.target/riscv/arch-9.c @@ -3,4 +3,4 @@ int foo() { } -/* { dg-warning "version of `g` will be omitted, please specify version for individual extension." "" { target *-*-* } 0 } */ +/* { dg-warning "version of 'g' will be omitted, please specify version for individual extension" "" { target *-*-* } 0 } */ -- 2.31.1.windows.1
Re: [PATCH 2/2] [Ada] Set target_cpu to x32 for x86_64-linux-gnux32
OK, thanks. > Since the x86_64-linux-gnux32 compiler is actually an x32 compiler, set > target_cpu to x32 for x86_64-linux-gnux32. > > PR ada/103538 > * gcc-interface/Makefile.in (target_cpu): Set to x32 for > x86_64-linux-gnux32. > --- > gcc/ada/gcc-interface/Makefile.in | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/gcc/ada/gcc-interface/Makefile.in > b/gcc/ada/gcc-interface/Makefile.in > index 53d0739470a..b8a24708280 100644 > --- a/gcc/ada/gcc-interface/Makefile.in > +++ b/gcc/ada/gcc-interface/Makefile.in > @@ -350,6 +350,13 @@ ifeq ($(strip $(filter-out x86_64, $(target_cpu))),) >endif > endif > > +# The x86_64-linux-gnux32 compiler is actually an x32 compiler > +ifeq ($(strip $(filter-out x86_64 linux-gnux32%, $(target_cpu) > $(target_os))),) > + ifneq ($(strip $(MULTISUBDIR)),/64) > +target_cpu:=x32 > + endif > +endif > + > # The SuSE PowerPC64/Linux compiler is actually a 32-bit PowerPC compiler > ifeq ($(strip $(filter-out powerpc64 suse linux%, $(target_cpu) > $(target_vendor) $(target_os))),) >target_cpu:=powerpc > -- > 2.34.1 >
Re: [PATCH 1/2] [Ada] Compile s-mmap and 128bit on x86_64-linux-gnux32
OK, thanks. > PR ada/103538 > * Makefile.rtl (LIBGNAT_TARGET_PAIRS): Add > $(TRASYM_DWARF_UNIX_PAIRS), > s-tsmona.adb $(GNATRTL_128BIT_PAIRS). > (EXTRA_GNATRTL_NONTASKING_OBJS): Add $(TRASYM_DWARF_UNIX_OBJS) > and $(GNATRTL_128BIT_OBJS). > --- > gcc/ada/Makefile.rtl | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl > index 1b066ad6b14..6d60aea75a8 100644 > --- a/gcc/ada/Makefile.rtl > +++ b/gcc/ada/Makefile.rtl > @@ -2650,13 +2650,18 @@ ifeq ($(strip $(filter-out %x32 linux%,$(target_cpu) > $(target_os))),) >s-tasinf.adbs-tpopsp.adbs-taspri.ads + $(TRASYM_DWARF_UNIX_PAIRS) \ > + s-tsmona.adb$(ATOMICS_TARGET_PAIRS) \ >$(X86_64_TARGET_PAIRS) \ > + $(GNATRTL_128BIT_PAIRS) \ >system.ads >TOOLS_TARGET_PAIRS = indepsw.adb >EXTRA_GNATRTL_NONTASKING_OBJS=g-sse.o g-ssvety.o > + EXTRA_GNATRTL_NONTASKING_OBJS+=$(TRASYM_DWARF_UNIX_OBJS) > + EXTRA_GNATRTL_NONTASKING_OBJS+=$(GNATRTL_128BIT_OBJS) >EXTRA_GNATRTL_TASKING_OBJS=s-linux.o a-exetim.o >EH_MECHANISM=-gcc >THREADSLIB=-lpthread -lrt > -- > 2.34.1 >
Re: nvptx: update fix for -Wformat-diag (was: [PATCH] nvptx: fix -Wformat-diag warnings)
On 1/19/22 08:31, Thomas Schwinge wrote: |Thanks. I additionally fixed up 'num_workers' in test cases, and then pushed to master branch commit 2aea19bdb12308aac1519ffc5ecc9d1ec24f9cc5 "nvptx: update fix for -Wformat-diag", see attached.| I thank you for testing and adjustment of the patch. Cheers, Martin
[PATCH] libstdc++: Fix for non-constexpr math_errhandling
Follow-up to my last patch. This one is a more thorough fix. Tested on x86_64- linux. OK for trunk? 8< Use SFINAE magic to support: "It is unspecified whether math_errhandling is a macro or an identifier with external linkage." [C Standard] Signed-off-by: Matthias Kretz libstdc++-v3/ChangeLog: * include/experimental/bits/simd.h (__floating_point_flags): Do not rely on math_errhandling to expand to a constant expression. --- libstdc++-v3/include/experimental/bits/simd.h | 26 ++- 1 file changed, 20 insertions(+), 6 deletions(-) -- ── Dr. Matthias Kretz https://mattkretz.github.io GSI Helmholtz Centre for Heavy Ion Research https://gsi.de stdₓ::simd ──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h index c991e3f223e..82e9841195e 100644 --- a/libstdc++-v3/include/experimental/bits/simd.h +++ b/libstdc++-v3/include/experimental/bits/simd.h @@ -283,20 +283,34 @@ constexpr inline bool __have_power_vmx = __have_power_vsx; namespace __detail { - constexpr bool __handle_fpexcept = #ifdef math_errhandling -math_errhandling & MATH_ERREXCEPT; -#elif defined __FAST_MATH__ -false; + // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant + // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value + // must be guessed. + template +constexpr bool __handle_fpexcept_impl(int) +{ return math_errhandling & MATH_ERREXCEPT; } +#endif + + // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are + // ignored, otherwise implement correct exception behavior. + constexpr bool __handle_fpexcept_impl(float) + { +#if defined __FAST_MATH__ +return false; #else -true; +return true; #endif + } + + /// True if math functions must raise floating-point exceptions as specified by C17. + static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0); constexpr std::uint_least64_t __floating_point_flags() { std::uint_least64_t __flags = 0; -if constexpr (__handle_fpexcept) +if constexpr (_S_handle_fpexcept) __flags |= 1; #ifdef __FAST_MATH__ __flags |= 1 << 1;
Re: [PATCH] git-backport: support renamed .cc files in commit message.
On 1/18/22 20:10, Harald Anlauf via Fortran wrote: Am 17.01.22 um 22:26 schrieb Martin Liška: On 1/12/22 16:54, Martin Liška wrote: There's a patch that enhances git-backport so that it updates commit messages for files which name ends now with .cc and is still .c on a branch. The patch has been installed as I've made the renaming now. Cheers, Martin I just made a "git rebase" and had to manually fix the filenames in the commit message. Otherwise gcc-verify would complain. Sure and I'm adding a new script that basically follows all the 'did you mean' in gcc-verify and fixes that: ERR: unchanged file mentioned in a ChangeLog (did you mean "contrib/gcc-git-customization.sh"?): "contrib/gcc2-git-customization.sh" ... Would it make sense to have something that is clever enough for rebase to do similar things as git-backport? Yes, a new git alias 'gcc-fix-changelog' is going to be available with the patch. Martin Thanks, Harald From 4f502745c8e2562ae192181bf2585bad42414d45 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Wed, 19 Jan 2022 07:57:05 +0100 Subject: [PATCH] Come up with git-fix-changelog.py script. contrib/ChangeLog: * git-backport.py: Use it. * git-fix-changelog.py: New file. * gcc-git-customization.sh: Add new alias git gcc-fix-changelog. --- contrib/gcc-git-customization.sh | 1 + contrib/git-backport.py | 47 +--- contrib/git-fix-changelog.py | 92 3 files changed, 95 insertions(+), 45 deletions(-) create mode 100755 contrib/git-fix-changelog.py diff --git a/contrib/gcc-git-customization.sh b/contrib/gcc-git-customization.sh index aca61b781ff..2eec17937af 100755 --- a/contrib/gcc-git-customization.sh +++ b/contrib/gcc-git-customization.sh @@ -27,6 +27,7 @@ git config alias.gcc-undescr \!"f() { o=\$(git config --get gcc-config.upstream) git config alias.gcc-verify '!f() { "`git rev-parse --show-toplevel`/contrib/gcc-changelog/git_check_commit.py" $@; } ; f' git config alias.gcc-backport '!f() { "`git rev-parse --show-toplevel`/contrib/git-backport.py" $@; } ; f' +git config alias.gcc-fix-changelog '!f() { "`git rev-parse --show-toplevel`/contrib/git-fix-changelog.py" $@; } ; f' git config alias.gcc-mklog '!f() { "`git rev-parse --show-toplevel`/contrib/mklog.py" $@; } ; f' git config alias.gcc-commit-mklog '!f() { "`git rev-parse --show-toplevel`/contrib/git-commit-mklog.py" "$@"; }; f' diff --git a/contrib/git-backport.py b/contrib/git-backport.py index 83189a2b5c7..fc369d97754 100755 --- a/contrib/git-backport.py +++ b/contrib/git-backport.py @@ -22,29 +22,9 @@ import argparse import os import subprocess -import tempfile script_folder = os.path.dirname(os.path.abspath(__file__)) -verify_script = os.path.join(script_folder, - 'gcc-changelog/git_check_commit.py') - - -def replace_file_in_changelog(lines, filename): -if not filename.endswith('.cc'): -return - -# consider all componenets of a path: gcc/ipa-icf.cc -while filename: -for i, line in enumerate(lines): -if filename in line: -line = line.replace(filename, filename[:-1]) -lines[i] = line -return -parts = filename.split('/') -if len(parts) == 1: -return -filename = '/'.join(parts[1:]) - +fixup_script = os.path.join(script_folder, 'git-fix-changelog.py') if __name__ == '__main__': parser = argparse.ArgumentParser(description='Backport a git revision.') @@ -52,27 +32,4 @@ if __name__ == '__main__': args = parser.parse_args() subprocess.run('git cherry-pick -x %s' % args.revision, shell=True) - -# Update commit message if change for a .cc file was taken -r = subprocess.run(f'{verify_script} HEAD', shell=True, encoding='utf8', - stdout=subprocess.PIPE, stderr=subprocess.PIPE) -if r.returncode != 0: -lines = r.stdout.splitlines() -cmd = 'git show -s --format=%B' -commit_message = subprocess.check_output(cmd, shell=True, - encoding='utf8').strip() -commit_message = commit_message.splitlines() - -todo = [line for line in lines if 'unchanged file mentioned' in line] -for item in todo: -filename = item.split()[-1].strip('"') -replace_file_in_changelog(commit_message, filename) - -with tempfile.NamedTemporaryFile('w', encoding='utf8', - delete=False) as w: -w.write('\n'.join(commit_message)) -w.close() -subprocess.check_output(f'git commit --amend -F {w.name}', -shell=True, encoding='utf8') -os.unlink(w.name) -print(f'Commit message updated: {len(todo)} .cc file(s) changed.') +subprocess.run(fixup_script, shell=True) diff --git a/contrib/git-fix-changelog.py