[C PATCH 1/6 v2] c: reorganize recursive type checking
Thanks Joseph, below is a a revised version of this patch with slight additional changes to the comment of tagged_types_tu_compatible_p. ok for trunk? Martin Am Mittwoch, dem 06.09.2023 um 20:59 + schrieb Joseph Myers: > On Sat, 26 Aug 2023, Martin Uecker via Gcc-patches wrote: > > > -static int > > +static bool > > comp_target_types (location_t location, tree ttl, tree ttr) > > The comment above this function should be updated to refer to returning > true, not to returning 1. And other comments on common_pointer_type and > inside that function should be updated to refer to comp_target_types > returning true, not nonzero. > > > @@ -1395,17 +1382,13 @@ free_all_tagged_tu_seen_up_to (const struct > > tagged_tu_seen_cache *tu_til) > > > > /* Return 1 if two 'struct', 'union', or 'enum' types T1 and T2 are > > compatible. If the two types are not the same (which has been > > - checked earlier), this can only happen when multiple translation > > - units are being compiled. See C99 6.2.7 paragraph 1 for the exact > > - rules. ENUM_AND_INT_P and DIFFERENT_TYPES_P are as in > > - comptypes_internal. */ > > + checked earlier). */ > > > > -static int > > +static bool > > tagged_types_tu_compatible_p (const_tree t1, const_tree t2, > > - bool *enum_and_int_p, bool *different_types_p) > > + struct comptypes_data* data) > > Similarly, this comment should be updated for the new return type. Also > the GNU style is "struct comptypes_data *data" with space before not after > '*'. > > > @@ -1631,9 +1603,9 @@ tagged_types_tu_compatible_p (const_tree t1, > > const_tree t2, > > Otherwise, the argument types must match. > > ENUM_AND_INT_P and DIFFERENT_TYPES_P are as in comptypes_internal. */ > > > > -static int > > +static bool > > function_types_compatible_p (const_tree f1, const_tree f2, > > -bool *enum_and_int_p, bool *different_types_p) > > +struct comptypes_data *data) > > Another comment to update for a changed return type. > > > /* Check two lists of types for compatibility, returning 0 for > > - incompatible, 1 for compatible, or 2 for compatible with > > - warning. ENUM_AND_INT_P and DIFFERENT_TYPES_P are as in > > - comptypes_internal. */ > > + incompatible, 1 for compatible. ENUM_AND_INT_P and > > + DIFFERENT_TYPES_P are as in comptypes_internal. */ > > > > -static int > > +static bool > > type_lists_compatible_p (const_tree args1, const_tree args2, > > -bool *enum_and_int_p, bool *different_types_p) > > +struct comptypes_data *data) > > This one also needs updating to remove references to parameters that no > longer exist. > c: reorganize recursive type checking Reorganize recursive type checking to use a structure to store information collected during the recursion and returned to the caller (warning_needed, enum_and_init_p, different_types_p). gcc/c: * c-typeck.cc (struct comptypes_data): Add structure. (tagged_types_tu_compatible_p, function_types_compatible_p, type_lists_compatible_p, comptypes_internal): Add structure to interface, change return type to bool, and adapt calls. (comptarget_types): Change return type too bool. (comptypes, comptypes_check_enum_int, comptypes_check_different_types): Adapt calls. --- gcc/c/c-typeck.cc | 282 -- 1 file changed, 121 insertions(+), 161 deletions(-) diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index e2bfd2caf85..e55e887da14 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -90,12 +90,14 @@ static bool require_constant_elements; static bool require_constexpr_value; static tree qualify_type (tree, tree); -static int tagged_types_tu_compatible_p (const_tree, const_tree, bool *, -bool *); -static int comp_target_types (location_t, tree, tree); -static int function_types_compatible_p (const_tree, const_tree, bool *, - bool *); -static int type_lists_compatible_p (const_tree, const_tree, bool *, bool *); +struct comptypes_data; +static bool tagged_types_tu_compatible_p (const_tree, const_tree, + struct comptypes_data *); +static bool comp_target_types (location_t, tree, tree); +static bool function_types_compatible_p (const_tree, const_tree, +struct comptypes_data *); +static bool type_lists_compatible_p (const_tree, const_tree, +struct comptypes_data *); static tree lookup_field (tree, tree); static int convert_arguments (location_t, vec, tree, vec *, vec *, tree, @@ -125,7 +127,8 @@ static tree find_init_member (tree, struct obstack *); static void readonly_warning (tree, enum lvalue_use); static int lval
Re: [PATCH] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm
On 9/9/23 21:55, Juzhe-Zhong wrote: If a const vector all elements are same, the slide up is unnecessary. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid unnecessary slideup. --- gcc/config/riscv/riscv-v.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index bee60de1d26..7ef884907b8 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2697,7 +2697,7 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d) rtx mask = force_reg (mask_mode, builder.build ()); rtx merge = d->op1; - if (need_slideup_p) + if (need_slideup_p && !const_vec_duplicate_p (d->op1)) { int slideup_cnt = vlen - (d->perm[vlen - 1].to_constant () % vlen) - 1; rtx ops[] = {d->target, d->op1, gen_int_mode (slideup_cnt, Pmode)}; Would it be better to adjust how we compute need_slidup_p to check !const_vec_duplicate_p (d->op1) instead of doing it here? That way the name "need_slideup_p" stays consistent with the intent of the code. It would also mean we wouldn't need to duplicate the additional check if we wanted to model the use of slideup in the cost calculations. Jeff
Re: [PATCH] RISC-V: Expand fixed-vlmax/vls vector permutation in targethook
On 9/9/23 20:33, Juzhe-Zhong wrote: When debugging FAIL: gcc.dg/pr92301.c execution test. Realize a vls vector permutation situation failed to vectorize since early return false: - /* For constant size indices, we dont't need to handle it here. - Just leave it to vec_perm. */ - if (d->perm.length ().is_constant ()) -return false; To avoid more potential failed vectorization case. Now expand it in targethook. gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_generic_patterns): Expand fixed-vlmax/vls vector permutation. OK. jeff
[PATCH] [11/12/13/14 Regression] ABI break in _Hash_node_value_base since GCC 11 [PR 111050]
Following confirmation of the fix by TC here is the patch where I'm simply adding a 'constexpr' on _M_next(). Please let me know this ChangeLog entry is correct. I would prefer this patch to be assigned to 'TC' with me as co-author but I don't know how to do such a thing. Unless I need to change my user git identity to do so ? libstdc++: Add constexpr qualification to _Hash_node::_M_next() https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b6f0476837205932613ddb2b3429a55c26c409d changed _Hash_node_value_base to no longer derive from _Hash_node_base, which means that its member functions expect _M_storage to be at a different offset. So explosions result if an out-of-line definition is emitted for any of the member functions (say, in a non-optimized build) and the resulting object file is then linked with code built using older version of GCC/libstdc++. libstdc++-v3/ChangeLog: * include/bits/hashtable_policy.h (_Hash_node_value_base<>::_M_valptr(), _Hash_node_value_base<>::_M_v()) Add [[__gnu__::__always_inline__]]. (_Hash_node<>::_M_next()): Add constexpr. Co-authored-by: TC Ok to commit and backport to GCC 11, 12, 13 branches ? François diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index 347d468ea86..101c5eb639c 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -327,18 +327,22 @@ namespace __detail __gnu_cxx::__aligned_buffer<_Value> _M_storage; + [[__gnu__::__always_inline__]] _Value* _M_valptr() noexcept { return _M_storage._M_ptr(); } + [[__gnu__::__always_inline__]] const _Value* _M_valptr() const noexcept { return _M_storage._M_ptr(); } + [[__gnu__::__always_inline__]] _Value& _M_v() noexcept { return *_M_valptr(); } + [[__gnu__::__always_inline__]] const _Value& _M_v() const noexcept { return *_M_valptr(); } @@ -372,7 +376,7 @@ namespace __detail : _Hash_node_base , _Hash_node_value<_Value, _Cache_hash_code> { - _Hash_node* + constexpr _Hash_node* _M_next() const noexcept { return static_cast<_Hash_node*>(this->_M_nxt); } };
[PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm
gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid unnecessary slideup. --- gcc/config/riscv/riscv-v.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index bee60de1d26..3cd1f61de0e 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2647,7 +2647,8 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d) For index = { 0, 2, 5, 6}, we need to slide op1 up before we apply compress approach. */ - bool need_slideup_p = maybe_ne (d->perm[vlen - 1], 2 * vec_len - 1); + bool need_slideup_p = maybe_ne (d->perm[vlen - 1], 2 * vec_len - 1) + && !const_vec_duplicate_p (d->op1); /* If we leave it directly be handled by general gather, the code sequence will be: -- 2.36.3
Re: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm
Address comment: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm (gnu.org) juzhe.zh...@rivai.ai From: Juzhe-Zhong Date: 2023-09-10 22:07 To: gcc-patches CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong Subject: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid unnecessary slideup. --- gcc/config/riscv/riscv-v.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index bee60de1d26..3cd1f61de0e 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2647,7 +2647,8 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d) For index = { 0, 2, 5, 6}, we need to slide op1 up before we apply compress approach. */ - bool need_slideup_p = maybe_ne (d->perm[vlen - 1], 2 * vec_len - 1); + bool need_slideup_p = maybe_ne (d->perm[vlen - 1], 2 * vec_len - 1) + && !const_vec_duplicate_p (d->op1); /* If we leave it directly be handled by general gather, the code sequence will be: -- 2.36.3
[pushed] Darwin: Partial reversion of r14-3648 (Inits Section).
Tested on x86_64-darwin21 and i686-darwin9 with both dwarfutils and llvm-based dsymutil implementations. Pushed to trunk, thanks Iain --- 8< --- Although the Darwin ABI places both hot and cold partitions in the same section (the linker can partition by name), this does not work with the current dwarf2out implementation. Since we do see global initialization code getting hot/cold splits, this patch places the cold parts into text_cold, and keeps the hot part in the correct Init section per ABI. TODO: figure out a way to allow us to match the ABI fully. gcc/ChangeLog: * config/darwin.cc (darwin_function_section): Place unlikely executed global init code into the standard cold section. Signed-off-by: Iain Sandoe --- gcc/config/darwin.cc | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc index 95d6194cf22..154a2b2755a 100644 --- a/gcc/config/darwin.cc +++ b/gcc/config/darwin.cc @@ -3893,19 +3893,22 @@ darwin_function_section (tree decl, enum node_frequency freq, if (decl && DECL_SECTION_NAME (decl) != NULL) return get_named_section (decl, NULL, 0); + /* We always put unlikely executed stuff in the cold section; we have to put + this ahead of the global init section, since partitioning within a section + breaks some assumptions made in the DWARF handling. */ + if (freq == NODE_FREQUENCY_UNLIKELY_EXECUTED) +return (use_coal) ? darwin_sections[text_cold_coal_section] + : darwin_sections[text_cold_section]; + /* Intercept functions in global init; these are placed in separate sections. - FIXME: there should be some neater way to do this. */ + FIXME: there should be some neater way to do this, FIXME we should be able + to partition within a section. */ if (DECL_NAME (decl) && (startswith (IDENTIFIER_POINTER (DECL_NAME (decl)), "_GLOBAL__sub_I") || startswith (IDENTIFIER_POINTER (DECL_NAME (decl)), "__static_initialization_and_destruction"))) return darwin_sections[static_init_section]; - /* We always put unlikely executed stuff in the cold section. */ - if (freq == NODE_FREQUENCY_UNLIKELY_EXECUTED) -return (use_coal) ? darwin_sections[text_cold_coal_section] - : darwin_sections[text_cold_section]; - /* If we have LTO *and* feedback information, then let LTO handle the function ordering, it makes a better job (for normal, hot, startup and exit - hence the bailout for cold above). */ -- 2.39.2 (Apple Git-143)
Re: Re: [PATCH] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm
Address comment: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm (gnu.org) juzhe.zh...@rivai.ai From: Jeff Law Date: 2023-09-10 21:34 To: Juzhe-Zhong; gcc-patches CC: kito.cheng; kito.cheng; rdapp.gcc Subject: Re: [PATCH] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm On 9/9/23 21:55, Juzhe-Zhong wrote: > If a const vector all elements are same, the slide up is unnecessary. > > gcc/ChangeLog: > > * config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid unnecessary > slideup. > > --- > gcc/config/riscv/riscv-v.cc | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index bee60de1d26..7ef884907b8 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -2697,7 +2697,7 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d) > rtx mask = force_reg (mask_mode, builder.build ()); > > rtx merge = d->op1; > - if (need_slideup_p) > + if (need_slideup_p && !const_vec_duplicate_p (d->op1)) > { > int slideup_cnt = vlen - (d->perm[vlen - 1].to_constant () % vlen) - > 1; > rtx ops[] = {d->target, d->op1, gen_int_mode (slideup_cnt, Pmode)}; Would it be better to adjust how we compute need_slidup_p to check !const_vec_duplicate_p (d->op1) instead of doing it here? That way the name "need_slideup_p" stays consistent with the intent of the code. It would also mean we wouldn't need to duplicate the additional check if we wanted to model the use of slideup in the cost calculations. Jeff
Re: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm
On 9/10/23 08:07, Juzhe-Zhong wrote: gcc/ChangeLog: * config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid unnecessary slideup. OK jeff
Re: [PATCH] [11/12/13/14 Regression] ABI break in _Hash_node_value_base since GCC 11 [PR 111050]
François Dumont via Gcc-patches writes: > Following confirmation of the fix by TC here is the patch where I'm > simply adding a 'constexpr' on _M_next(). > > Please let me know this ChangeLog entry is correct. I would prefer > this patch to be assigned to 'TC' with me as co-author but I don't > know how to do such a thing. Unless I need to change my user git > identity to do so ? git commit --author="TC " --amend > > libstdc++: Add constexpr qualification to _Hash_node::_M_next() > > https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b6f0476837205932613ddb2b3429a55c26c409d > changed _Hash_node_value_base to no longer derive from > _Hash_node_base, which means > that its member functions expect _M_storage to be at a different > offset. So explosions > result if an out-of-line definition is emitted for any of the > member functions (say, > in a non-optimized build) and the resulting object file is then > linked with code built > using older version of GCC/libstdc++. > > libstdc++-v3/ChangeLog: > > * include/bits/hashtable_policy.h > (_Hash_node_value_base<>::_M_valptr(), > _Hash_node_value_base<>::_M_v()) > Add [[__gnu__::__always_inline__]]. > (_Hash_node<>::_M_next()): Add constexpr. > > Co-authored-by: TC > > Ok to commit and backport to GCC 11, 12, 13 branches ? > > François > > [2. text/x-patch; pr111050.patch]...
Re: [PATCH] RISC-V Add Types to Un-Typed Thead Instructions:
On 8/31/23 11:36, Edwin Lu wrote: Related Discussion: https://inbox.sourceware.org/gcc-patches/12fb5088-3f28-0a69-de1e-f387371a5...@gmail.com/ This patch updates the THEAD instructions to ensure that no insn is left without a type attribute. Tested for regressions using rv32/64 multilib for linux/newlib. gcc/Changelog: * config/riscv/thead.md: Update types OK. THe first could arguably be "multi", but both instructions it generates appear to be move/conversions, so "fmove" is reasonable as well. Ok for the trunk. And I think that's should allow us to turn on the assertion, right? jeff
Re: [PATCH] Fix PR 111331: wrong code for `a > 28 ? MIN : 29`
On 9/8/23 06:39, Andrew Pinski via Gcc-patches wrote: The problem here is after r6-7425-ga9fee7cdc3c62d0e51730, the comparison to see if the transformation could be done was using the wrong value. Instead of see if the inner was LE (for MIN and GE for MAX) the outer value, it was comparing the inner to the value used in the comparison which was wrong. The match pattern copied the same logic mistake when they were added in r14-1411-g17cca3c43e2f49 . OK? Bootstrapped and tested on x86_64-linux-gnu. gcc/ChangeLog: PR tree-optimization/111331 * match.pd (`(a CMP CST1) ? max : a`): Fix the LE/GE comparison to the correct value. * tree-ssa-phiopt.cc (minmax_replacement): Fix the LE/GE comparison for the `(a CMP CST1) ? max : a` optimization. gcc/testsuite/ChangeLog: PR tree-optimization/111331 * gcc.c-torture/execute/pr111331-1.c: New test. * gcc.c-torture/execute/pr111331-2.c: New test. * gcc.c-torture/execute/pr111331-3.c: New test. OK jeff
PING^4: [PATCH] rtl-optimization/110939 Really fix narrow comparison of memory and constant
Ping. > > > On Thu, Aug 10, 2023 at 03:04:03PM +0200, Stefan Schulze Frielinghaus > > > wrote: > > > > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I > > > > completely missed the fact that the normal form of a generated constant > > > > for a > > > > mode with fewer bits than in HOST_WIDE_INT is a sign extended version > > > > of the > > > > actual constant. This even holds true for unsigned constants. > > > > > > > > Fixed by masking out the upper bits for the incoming constant and sign > > > > extending the resulting unsigned constant. > > > > > > > > Bootstrapped and regtested on x64 and s390x. Ok for mainline? > > > > > > > > While reading existing optimizations in combine I stumbled across two > > > > optimizations where either my intuition about the representation of > > > > unsigned integers via a const_int rtx is wrong, which then in turn would > > > > probably also mean that this patch is wrong, or that the optimizations > > > > are missed sometimes. In other words in the following I would assume > > > > that the upper bits are masked out: > > > > > > > > diff --git a/gcc/combine.cc b/gcc/combine.cc > > > > index 468b7fde911..80c4ff0fbaf 100644 > > > > --- a/gcc/combine.cc > > > > +++ b/gcc/combine.cc > > > > @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, > > > > machine_mode mode, > > > > /* (unsigned) < 0x8000 is equivalent to >= 0. */ > > > > else if (is_a (mode, &int_mode) > > > > && GET_MODE_PRECISION (int_mode) - 1 < > > > > HOST_BITS_PER_WIDE_INT > > > > - && ((unsigned HOST_WIDE_INT) const_op > > > > + && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK > > > > (int_mode)) > > > > == HOST_WIDE_INT_1U << (GET_MODE_PRECISION > > > > (int_mode) - 1))) > > > > { > > > > const_op = 0; > > > > @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, > > > > machine_mode mode, > > > > /* (unsigned) >= 0x8000 is equivalent to < 0. */ > > > > else if (is_a (mode, &int_mode) > > > > && GET_MODE_PRECISION (int_mode) - 1 < > > > > HOST_BITS_PER_WIDE_INT > > > > - && ((unsigned HOST_WIDE_INT) const_op > > > > + && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK > > > > (int_mode)) > > > > == HOST_WIDE_INT_1U << (GET_MODE_PRECISION > > > > (int_mode) - 1))) > > > > { > > > > const_op = 0; > > > > > > > > For example, while bootstrapping on x64 the optimization is missed since > > > > a LTU comparison in QImode is done and the constant equals > > > > 0xff80. > > > > > > > > Sorry for inlining another patch, but I would really like to make sure > > > > that my understanding is correct, now, before I come up with another > > > > patch. Thus it would be great if someone could shed some light on this. > > > > > > > > gcc/ChangeLog: > > > > > > > > * combine.cc (simplify_compare_const): Properly handle unsigned > > > > constants while narrowing comparison of memory and constants. > > > > --- > > > > gcc/combine.cc | 19 ++- > > > > 1 file changed, 10 insertions(+), 9 deletions(-) > > > > > > > > diff --git a/gcc/combine.cc b/gcc/combine.cc > > > > index e46d202d0a7..468b7fde911 100644 > > > > --- a/gcc/combine.cc > > > > +++ b/gcc/combine.cc > > > > @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, > > > > machine_mode mode, > > > > && !MEM_VOLATILE_P (op0) > > > > /* The optimization makes only sense for constants which are big > > > > enough > > > > so that we have a chance to chop off something at all. */ > > > > - && (unsigned HOST_WIDE_INT) const_op > 0xff > > > > - /* Bail out, if the constant does not fit into INT_MODE. */ > > > > - && (unsigned HOST_WIDE_INT) const_op > > > > - < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << > > > > 1) - 1) > > > > + && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK > > > > (int_mode)) > 0xff > > > > /* Ensure that we do not overflow during normalization. */ > > > > - && (code != GTU || (unsigned HOST_WIDE_INT) const_op < > > > > HOST_WIDE_INT_M1U)) > > > > + && (code != GTU > > > > + || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK > > > > (int_mode)) > > > > + < HOST_WIDE_INT_M1U) > > > > + && trunc_int_for_mode (const_op, int_mode) == const_op) > > > > { > > > > - unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op; > > > > + unsigned HOST_WIDE_INT n > > > > + = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode); > > > > enum rtx_code adjusted_code; > > > > > > > > /* Normalize code to either LEU or GEU. */ > > > > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, > > > > machine_mode mode, > > > > HOST_WIDE
[PATCH v2] swap: Fix incorrect lane extraction by vec_extract() [PR106770]
swap: Fix incorrect lane extraction by vec_extract() [PR106770] In the routine rs6000_analyze_swaps(), special handling of swappable instructions is done even if the webs that contain the swappable instructions are not optimized, i.e., the webs do not contain any permuting load/store instructions along with the associated register swap instructions. Doing special handling in such webs will result in the extracted lane being adjusted unnecessarily for vec_extract. Another issue is that existing code treats non-permuting loads/stores as special swappables. Non-permuting loads/stores (that have not yet been split into a permuting load/store and a swap) are handled by converting them into a permuting load/store (which effectively removes the swap). As a result, if special swappables are handled only in webs containing permuting loads/stores, then non-optimal code is generated for non-permuting loads/stores. Hence, in this patch, all webs containing either permuting loads/ stores or non-permuting loads/stores are marked as requiring special handling of swappables. Swaps associated with permuting loads/stores are marked for removal, and non-permuting loads/stores are converted to permuting loads/stores. Then the special swappables in the webs are fixed up. Another issue with always handling swappable instructions is that it is incorrect to do so in webs where loads/stores on quad word aligned addresses are changed to lvx/stvx. Similarly, in webs where swap(load(vector constant)) instructions are replaced with load(swapped vector constant), the swappable instructions should not be modified. 2023-09-10 Surya Kumari Jangala gcc/ PR rtl-optimization/PR106770 * config/rs6000/rs6000-p8swap.cc (non_permuting_mem_insn): New function. (handle_non_permuting_mem_insn): New function. (rs6000_analyze_swaps): Handle swappable instructions only in certain webs. (web_requires_special_handling): New instance variable. (handle_special_swappables): Remove handling of non-permuting load/store instructions. gcc/testsuite/ PR rtl-optimization/PR106770 * gcc.target/powerpc/pr106770.c: New test. --- diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc index 0388b9bd736..3a695aa1318 100644 --- a/gcc/config/rs6000/rs6000-p8swap.cc +++ b/gcc/config/rs6000/rs6000-p8swap.cc @@ -179,6 +179,13 @@ class swap_web_entry : public web_entry_base unsigned int special_handling : 4; /* Set if the web represented by this entry cannot be optimized. */ unsigned int web_not_optimizable : 1; + /* Set if the swappable insns in the web represented by this entry + have to be fixed. Swappable insns have to be fixed in : + - webs containing permuting loads/stores and the swap insns +in such webs have been marked for removal + - webs where non-permuting loads/stores have been converted +to permuting loads/stores */ + unsigned int web_requires_special_handling : 1; /* Set if this insn should be deleted. */ unsigned int will_delete : 1; }; @@ -1468,14 +1475,6 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i) if (dump_file) fprintf (dump_file, "Adjusting subreg in insn %d\n", i); break; -case SH_NOSWAP_LD: - /* Convert a non-permuting load to a permuting one. */ - permute_load (insn); - break; -case SH_NOSWAP_ST: - /* Convert a non-permuting store to a permuting one. */ - permute_store (insn); - break; case SH_EXTRACT: /* Change the lane on an extract operation. */ adjust_extract (insn); @@ -2401,6 +2400,25 @@ recombine_lvx_stvx_patterns (function *fun) free (to_delete); } +/* Return true if insn is a non-permuting load/store. */ +static bool +non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i) +{ + return (insn_entry[i].special_handling == SH_NOSWAP_LD || + insn_entry[i].special_handling == SH_NOSWAP_ST); +} + +/* Convert a non-permuting load/store insn to a permuting one. */ +static void +handle_non_permuting_mem_insn (swap_web_entry *insn_entry, unsigned int i) +{ + rtx_insn *insn = insn_entry[i].insn; + if (insn_entry[i].special_handling == SH_NOSWAP_LD) +permute_load (insn); + else if (insn_entry[i].special_handling == SH_NOSWAP_ST) +permute_store (insn); +} + /* Main entry point for this pass. */ unsigned int rs6000_analyze_swaps (function *fun) @@ -2624,25 +2642,56 @@ rs6000_analyze_swaps (function *fun) dump_swap_insn_table (insn_entry); } - /* For each load and store in an optimizable web (which implies - the loads and stores are permuting), find the associated - register swaps and mark them for removal. Due to various - optimizations we may mark the same swap more than once. Also - perform special handling for swappable insns that require it. */ + /* There are two kinds of optimizations tha
[PATCH 1/2] testsuite: Add and use thread_fence effective-target
Some targets like arm-eabi with newlib and default settings rely on __sync_synchronize() to ensure synchronization. Newlib does not implement it by default, to make users aware they have to take special care. This makes a few tests fail to link. This patch adds a new thread_fence effective target (similar to the corresponding one in libstdc++ testsuite), and uses it in the tests that need it, making them UNSUPPORTED instead of FAIL and UNRESOLVED. 2023-09-10 Christophe Lyon gcc/ * doc/sourcebuild.texi (Other attributes): Document thread_fence effective-target. gcc/testsuite/ * g++.dg/init/array54.C: Require thread_fence. * gcc.dg/c2x-nullptr-1.c: Likewise. * gcc.dg/pr103721-2.c: Likewise. * lib/target-supports.exp (check_effective_target_thread_fence): New. --- gcc/doc/sourcebuild.texi | 4 gcc/testsuite/g++.dg/init/array54.C | 1 + gcc/testsuite/gcc.dg/c2x-nullptr-1.c | 1 + gcc/testsuite/gcc.dg/pr103721-2.c | 1 + gcc/testsuite/lib/target-supports.exp | 12 5 files changed, 19 insertions(+) diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 1a78b3c1abb..a5f61c29f3b 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2860,6 +2860,10 @@ Compiler has been configured to support link-time optimization (LTO). Compiler and linker support link-time optimization relocatable linking with @option{-r} and @option{-flto} options. +@item thread_fence +Target implements @code{__atomic_thread_fence} without relying on +non-implemented @code{__sync_synchronize()}. + @item naked_functions Target supports the @code{naked} function attribute. diff --git a/gcc/testsuite/g++.dg/init/array54.C b/gcc/testsuite/g++.dg/init/array54.C index f6be350ba72..5241e451d6d 100644 --- a/gcc/testsuite/g++.dg/init/array54.C +++ b/gcc/testsuite/g++.dg/init/array54.C @@ -1,5 +1,6 @@ // PR c++/90947 // { dg-do run { target c++11 } } +// { dg-require-effective-target thread_fence } #include diff --git a/gcc/testsuite/gcc.dg/c2x-nullptr-1.c b/gcc/testsuite/gcc.dg/c2x-nullptr-1.c index 4e440234d52..97a31c27409 100644 --- a/gcc/testsuite/gcc.dg/c2x-nullptr-1.c +++ b/gcc/testsuite/gcc.dg/c2x-nullptr-1.c @@ -1,5 +1,6 @@ /* Test valid usage of C23 nullptr. */ /* { dg-do run } */ +// { dg-require-effective-target thread_fence } /* { dg-options "-std=c2x -pedantic-errors -Wall -Wextra -Wno-unused-variable" } */ #include diff --git a/gcc/testsuite/gcc.dg/pr103721-2.c b/gcc/testsuite/gcc.dg/pr103721-2.c index aefa1f0f147..e059b1cfc2d 100644 --- a/gcc/testsuite/gcc.dg/pr103721-2.c +++ b/gcc/testsuite/gcc.dg/pr103721-2.c @@ -1,4 +1,5 @@ // { dg-do run } +// { dg-require-effective-target thread_fence } // { dg-options "-O2" } extern void abort (); diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index d353cc0aaf0..7ac9e7530cc 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9107,6 +9107,18 @@ proc check_effective_target_sync_char_short { } { || [check_effective_target_mips_llsc] }}] } +# Return 1 if thread_fence does not rely on __sync_synchronize +# library function + +proc check_effective_target_thread_fence {} { +return [check_no_compiler_messages thread_fence executable { + int main () { + __atomic_thread_fence (__ATOMIC_SEQ_CST); + return 0; + } +} ""] +} + # Return 1 if the target uses a ColdFire FPU. proc check_effective_target_coldfire_fpu { } { -- 2.34.1
[PATCH 2/2] libstdc++: Add dg-require-thread-fence in several tests
Some targets like arm-eabi with newlib and default settings rely on __sync_synchronize() to ensure synchronization. Newlib does not implement it by default, to make users aware they have to take special care. This makes a few tests fail to link. This patch requires the missing thread-fence effective target in the tests that need it, making them UNSUPPORTED instead of FAIL and UNRESOLVED. 2023-09-10 Christophe Lyon libstdc++-v3/ * testsuite/20_util/to_address/debug.cc: Require thread-fence effective target. * testsuite/21_strings/basic_string/cons/char/self_move.cc: Likewise. * testsuite/21_strings/basic_string/debug/1_neg.cc: Likewise. * testsuite/21_strings/basic_string/debug/2_neg.cc: Likewise. * testsuite/21_strings/basic_string/debug/find1_neg.cc: Likewise. * testsuite/21_strings/basic_string/debug/find2_neg.cc: Likewise. * testsuite/21_strings/basic_string/hash/debug.cc: Likewise. * testsuite/21_strings/basic_string/requirements/citerators.cc: Likewise. * testsuite/21_strings/basic_string/requirements/exception/basic.cc: Likewise. * testsuite/21_strings/basic_string/requirements/exception/generation_prohibited.cc: Likewise. * testsuite/21_strings/basic_string/requirements/exception/propagation_consistent.cc: Likewise. * testsuite/21_strings/debug/shrink_to_fit.cc: Likewise. * testsuite/23_containers/array/debug/back1_neg.cc: Likewise. * testsuite/23_containers/array/debug/back2_neg.cc: Likewise. * testsuite/23_containers/array/debug/front1_neg.cc: Likewise. * testsuite/23_containers/array/debug/front2_neg.cc: Likewise. * testsuite/23_containers/array/debug/square_brackets_operator1_neg.cc: Likewise. * testsuite/23_containers/array/debug/square_brackets_operator2_neg.cc: Likewise. * testsuite/23_containers/deque/cons/self_move.cc: Likewise. * testsuite/23_containers/deque/debug/98466.cc: Likewise. * testsuite/23_containers/deque/debug/assign4_neg.cc: Likewise. * testsuite/23_containers/deque/debug/construct4_neg.cc: Likewise. * testsuite/23_containers/deque/debug/insert4_neg.cc: Likewise. * testsuite/23_containers/deque/debug/invalidation/1.cc: Likewise. * testsuite/23_containers/deque/debug/invalidation/2.cc: Likewise. * testsuite/23_containers/deque/debug/invalidation/3.cc: Likewise. * testsuite/23_containers/deque/debug/invalidation/4.cc: Likewise. * testsuite/23_containers/forward_list/cons/self_move.cc: Likewise. * testsuite/23_containers/forward_list/debug/construct4_neg.cc: Likewise. * testsuite/23_containers/forward_list/debug/move_assign_neg.cc: Likewise. * testsuite/23_containers/forward_list/debug/move_neg.cc: Likewise. * testsuite/23_containers/list/cons/self_move.cc: Likewise. * testsuite/23_containers/list/debug/assign4_neg.cc: Likewise. * testsuite/23_containers/list/debug/construct4_neg.cc: Likewise. * testsuite/23_containers/list/debug/insert4_neg.cc: Likewise. * testsuite/23_containers/list/debug/invalidation/1.cc: Likewise. * testsuite/23_containers/list/debug/invalidation/2.cc: Likewise. * testsuite/23_containers/list/debug/invalidation/3.cc: Likewise. * testsuite/23_containers/list/debug/invalidation/4.cc: Likewise. * testsuite/23_containers/map/debug/construct4_neg.cc: Likewise. * testsuite/23_containers/map/debug/construct5_neg.cc: Likewise. * testsuite/23_containers/map/debug/insert4_neg.cc: Likewise. * testsuite/23_containers/map/debug/invalidation/1.cc: Likewise. * testsuite/23_containers/map/debug/invalidation/2.cc: Likewise. * testsuite/23_containers/map/debug/move_assign_neg.cc: Likewise. * testsuite/23_containers/map/debug/move_neg.cc: Likewise. * testsuite/23_containers/map/modifiers/erase/end_neg.cc: Likewise. * testsuite/23_containers/map/modifiers/insert/16813.cc: Likewise. * testsuite/23_containers/multimap/debug/construct4_neg.cc: Likewise. * testsuite/23_containers/multimap/debug/construct5_neg.cc: Likewise. * testsuite/23_containers/multimap/debug/insert4_neg.cc: Likewise. * testsuite/23_containers/multimap/debug/invalidation/1.cc: Likewise. * testsuite/23_containers/multimap/debug/invalidation/2.cc: Likewise. * testsuite/23_containers/multimap/debug/move_assign_neg.cc: Likewise. * testsuite/23_containers/multimap/debug/move_neg.cc: Likewise. * testsuite/23_containers/multiset/debug/construct4_neg.cc: Likewise. * testsuite/23_containers/multiset/debug/construct5_neg.cc: Likewise. * testsuite/23_containers/multiset/debug/insert4_neg.cc: Likewise. * testsuite/23_containers/multiset/debug/invalidation/1.cc: Likewise. * testsuite/23_containers/multiset/debug
RE: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm
Committed, thanks Jeff. Pan -Original Message- From: Gcc-patches On Behalf Of Jeff Law via Gcc-patches Sent: Sunday, September 10, 2023 11:25 PM To: Juzhe-Zhong ; gcc-patches@gcc.gnu.org Cc: kito.ch...@sifive.com; kito.ch...@gmail.com Subject: Re: [PATCH V2] RISC-V: Avoid unnecessary slideup in compress pattern of vec_perm On 9/10/23 08:07, Juzhe-Zhong wrote: > gcc/ChangeLog: > > * config/riscv/riscv-v.cc (shuffle_compress_patterns): Avoid > unnecessary slideup. OK jeff
RE: [PATCH] RISC-V: Expand fixed-vlmax/vls vector permutation in targethook
Committed, thanks Jeff. Pan -Original Message- From: Gcc-patches On Behalf Of Jeff Law via Gcc-patches Sent: Sunday, September 10, 2023 9:38 PM To: Juzhe-Zhong ; gcc-patches@gcc.gnu.org Cc: kito.ch...@sifive.com; kito.ch...@gmail.com Subject: Re: [PATCH] RISC-V: Expand fixed-vlmax/vls vector permutation in targethook On 9/9/23 20:33, Juzhe-Zhong wrote: > When debugging FAIL: gcc.dg/pr92301.c execution test. > Realize a vls vector permutation situation failed to vectorize since early > return false: > > - /* For constant size indices, we dont't need to handle it here. > - Just leave it to vec_perm. */ > - if (d->perm.length ().is_constant ()) > -return false; > > To avoid more potential failed vectorization case. Now expand it in > targethook. > > gcc/ChangeLog: > > * config/riscv/riscv-v.cc (shuffle_generic_patterns): Expand > fixed-vlmax/vls vector permutation. OK. jeff
[PATCH] Remove constraint modifier % for fcmaddcph/fmaddcph/fcmulcph since there're not commutative.
Here's the patch I've commited. The patch also remove % for vfmaddcph. gcc/ChangeLog: PR target/111306 PR target/111335 * config/i386/sse.md (int_comm): New int_attr. (fma__): Remove % for Complex conjugate operations since they're not commutative. (fma___pair): Ditto. (___mask): Ditto. (cmul3): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr111306.c: New test. --- gcc/config/i386/sse.md | 16 --- gcc/testsuite/gcc.target/i386/pr111306.c | 36 2 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr111306.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 6d3ae8dea0c..14615999394 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6480,6 +6480,14 @@ (define_int_attr complexpairopname [(UNSPEC_COMPLEX_FMA_PAIR "fmaddc") (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")]) +(define_int_attr int_comm + [(UNSPEC_COMPLEX_FMA "") +(UNSPEC_COMPLEX_FMA_PAIR "") +(UNSPEC_COMPLEX_FCMA "") +(UNSPEC_COMPLEX_FCMA_PAIR "") +(UNSPEC_COMPLEX_FMUL "%") +(UNSPEC_COMPLEX_FCMUL "")]) + (define_int_attr conj_op [(UNSPEC_COMPLEX_FMA "") (UNSPEC_COMPLEX_FCMA "_conj") @@ -6593,7 +6601,7 @@ (define_expand "cmla4" (define_insn "fma__" [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=&v") (unspec:VHF_AVX512VL - [(match_operand:VHF_AVX512VL 1 "" "%v") + [(match_operand:VHF_AVX512VL 1 "" "v") (match_operand:VHF_AVX512VL 2 "" "") (match_operand:VHF_AVX512VL 3 "" "0")] UNSPEC_COMPLEX_F_C_MA))] @@ -6658,7 +,7 @@ (define_insn_and_split "fma___fma_zero" (define_insn "fma___pair" [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v") (unspec:VF1_AVX512VL -[(match_operand:VF1_AVX512VL 1 "vector_operand" "%v") +[(match_operand:VF1_AVX512VL 1 "vector_operand" "v") (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr") (match_operand:VF1_AVX512VL 3 "vector_operand" "0")] UNSPEC_COMPLEX_F_C_MA_PAIR))] @@ -6727,7 +6735,7 @@ (define_insn "___mask" [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=&v") (vec_merge:VHF_AVX512VL (unspec:VHF_AVX512VL - [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "%v") + [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "v") (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "") (match_operand:VHF_AVX512VL 3 "register_operand" "0")] UNSPEC_COMPLEX_F_C_MA) @@ -6752,7 +6760,7 @@ (define_expand "cmul3" (define_insn "__" [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=&v") (unspec:VHF_AVX512VL - [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "%v") + [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "v") (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "")] UNSPEC_COMPLEX_F_C_MUL))] "TARGET_AVX512FP16 && " diff --git a/gcc/testsuite/gcc.target/i386/pr111306.c b/gcc/testsuite/gcc.target/i386/pr111306.c new file mode 100644 index 000..541725ebdad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr111306.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ +/* { dg-require-effective-target avx512fp16 } */ + +#define AVX512FP16 +#include "avx512f-helper.h" + +__attribute__((optimize("O2"),noipa)) +void func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) { + __m512h rA = _mm512_loadu_ph(a); + for (int i = 0; i < n; i += 32) { +__m512h rB = _mm512_loadu_ph(b + i); +_mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA)); + } +} + +void +test_512 (void) +{ + int n = 32; + _Float16 a[n], b[n], c[n]; + _Float16 exp[n]; + for (int i = 1; i <= n; i++) { +a[i - 1] = i & 1 ? -i : i; +b[i - 1] = i; + } + + func1(a, b, n, c); + for (int i = 0; i < n / 32; i += 2) { +if (c[i] != a[i] * b[i] + a[i+1] * b[i+1] + || c[i+1] != a[i] * b[i+1] - a[i+1]*b[i]) + __builtin_abort (); +} +} + + -- 2.31.1
Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
On Thu, Sep 7, 2023 at 1:28 PM David Malcolm wrote: > On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote: > > > Hi Dave, > > Hi Eric, thanks for the patch. > > > > > Recently I've been working on symbolic value support for the reference > > count checker. I've attached a patch for it below; let me know it looks > > OK for trunk. Thanks! > > > > Best, > > Eric > > > > --- > > > > This patch enhances the reference count checker in the CPython plugin by > > adding support for symbolic values. Whereas previously we were only able > > to check the reference count of PyObject* objects created in the scope > > of the function; we are now able to emit diagnostics on reference count > > mismatch of objects that were, for example, passed in as a function > > parameter. > > > > rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but > ob_refcnt field is N + ‘2’ > > 6 | return obj; > > | ^~~ > > [...snip...] > > > create mode 100644 > gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c > > > > diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > > index bf1982e79c3..d7ecd7fce09 100644 > > --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > > +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > > @@ -314,17 +314,20 @@ public: > >{ > > diagnostic_metadata m; > > bool warned; > > -// just assuming constants for now > > -auto actual_refcnt > > - = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant (); > > -auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue > ()->get_constant (); > > -warned = warning_meta (rich_loc, m, get_controlling_option (), > > -"expected %qE to have " > > -"reference count: %qE but ob_refcnt field is: > %qE", > > -m_reg_tree, actual_refcnt, ob_refcnt); > > - > > -// location_t loc = rich_loc->get_loc (); > > -// foo (loc); > > + > > +const auto *actual_refcnt_constant > > + = m_actual_refcnt->dyn_cast_constant_svalue (); > > +const auto *ob_refcnt_constant = > m_ob_refcnt->dyn_cast_constant_svalue (); > > +if (!actual_refcnt_constant || !ob_refcnt_constant) > > + return false; > > + > > +auto actual_refcnt = actual_refcnt_constant->get_constant (); > > +auto ob_refcnt = ob_refcnt_constant->get_constant (); > > +warned = warning_meta ( > > + rich_loc, m, get_controlling_option (), > > + "expected %qE to have " > > + "reference count: N + %qE but ob_refcnt field is N + %qE", > > + m_reg_tree, actual_refcnt, ob_refcnt); > > return warned; > > I know you're emulating the old behavior I implemented way back in > cpychecker, but I don't like that behavior :( > > Specifically, although the patch improves the behavior for symbolic > values, it regresses the precision of wording for the concrete values > case. If we have e.g. a concrete ob_refcnt of 2, whereas we only have > 1 pointer, then it's more readable to say: > > warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt > field is ‘2’ > > than: > > warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt > field is N + ‘2’ > > ...and we shouldn't quote concrete numbers, the message should be: > > warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field > is 2 > or better: > > warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2 > > > Can you move the unwrapping of the svalue from the tests below into the > emit vfunc? That way the m_actual_refcnt doesn't have to be a > constant_svalue; you could have logic in the emit vfunc to print > readable messages based on what kind of svalue it is. > > Rather than 'N', it might be better to say 'initial'; how about: > > warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt' > field has increased by 1 > warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' > field has increased by 2 > warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' > field is unchanged > warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt' > field has decreased by 1 > warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field > is unchanged > > and similar? > That makes sense to me as well (indeed I was just emulating the old behavior)! Will experiment and keep you posted on a revised patch with this in mind. This is somewhat of a minor detail but can we emit ‘*obj’ as bolded text in the diagnostic message? Currently, I can emit this (including the asterisk) like so: '*%E'. But unlike using %qE, it doesn't bold the body of the single quotations. Is this possible? > > Maybe have a flag that tracks whether we're talking about a concrete > value that's absolute versus a concrete value that's relative to the > initial value? > > > [...snip...] > > > > @@ -369,6 +368,19 @@ i
[PATCH] MATCH: [PR111346] `X CMP MINMAX` pattern missing :c on CMP
I noticed this while working on other MINMAX optimizations. It was hard to find a simplified testcase though because it was dependent on the ssa name versions. Adding the `:c` to cmp allows the pattern to be match for the case where minmax as the first operand of the comparison rather than the second. Committed as obvious after a bootstrap/test on x86_64-linux-gnu. PR tree-optimization/111346 gcc/ChangeLog: * match.pd (`X CMP MINMAX`): Add `:c` on the cmp part of the pattern gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/minmaxcmp-1.c: New test. --- gcc/match.pd| 2 +- gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c | 39 + 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c diff --git a/gcc/match.pd b/gcc/match.pd index c7b6db4b543..a60fe04885e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3942,7 +3942,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (for minmax (min min max max ) cmp(ge lt le gt ) (simplify - (cmp @0 (minmax:c @0 @1)) + (cmp:c @0 (minmax:c @0 @1)) { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } )) /* Undo fancy ways of writing max/min or other ?: expressions, like diff --git a/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c b/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c new file mode 100644 index 000..0706c026076 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/minmaxcmp-1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-original" } */ +/* PR tree-optimization/111346 */ + +int f(); +int g(); + +_Bool test1(int a, int b) +{ +return ((a > b) ? a : b) >= a; // return 1; +} +_Bool test1_(int a, int b) +{ +return a <= ((a > b) ? a : b); // return 1; +} +/* test1 and test1_ should be able to optimize to `return 1;` during fold. */ +/* { dg-final { scan-tree-dump-times "return 1;" 2 "original" } } */ +/* { dg-final { scan-tree-dump-not " MAX_EXPR " "original" } } */ + +_Bool test2(int a, int b) +{ +a = f(); +a = g(); +int t = a; +if (t < b) t = b; +return t >= a; // return 1; +} + +_Bool test2_(int a, int b) +{ +a = g(); +int t = a; +if (t < b) t = b; +return t >= a; // return 1; +} + +/* All of these should be optimized to just be the function calls and `return 1;` */ +/* { dg-final { scan-tree-dump-times "return 1;" 4 "optimized" } } */ +/* { dg-final { scan-tree-dump-not " MAX_EXPR " "optimized" } } */ -- 2.31.1
Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]
> diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md > index d208b418e5f..6f48f7d6232 100644 > --- a/gcc/config/riscv/autovec-vls.md > +++ b/gcc/config/riscv/autovec-vls.md > @@ -148,6 +148,14 @@ >[(set_attr "type" "vmov") > (set_attr "mode" "")]) > > +(define_insn "*mov_vls" > + [(set (match_operand:VLSB 0 "register_operand" "=vr") > + (match_operand:VLSB 1 "register_operand" " vr"))] > + "TARGET_VECTOR" > + "vmv1r.v\t%0,%1" > + [(set_attr "type" "vmov") > + (set_attr "mode" "")]) Should we also add loads and stores as well? and just make sure this is also necessary for the fix and not sneaky, right? > + > (define_expand "movmisalign" >[(set (match_operand:VLS 0 "nonimmediate_operand") > (match_operand:VLS 1 "general_operand"))]
Re: [PATCH] MATCH: [PR111346] `X CMP MINMAX` pattern missing :c on CMP
On 9/10/23 20:18, Andrew Pinski via Gcc-patches wrote: I noticed this while working on other MINMAX optimizations. It was hard to find a simplified testcase though because it was dependent on the ssa name versions. Adding the `:c` to cmp allows the pattern to be match for the case where minmax as the first operand of the comparison rather than the second. Committed as obvious after a bootstrap/test on x86_64-linux-gnu. PR tree-optimization/111346 gcc/ChangeLog: * match.pd (`X CMP MINMAX`): Add `:c` on the cmp part of the pattern gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/minmaxcmp-1.c: New test. OK jeff
Re: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]
>> Should we also add loads and stores as well? >> and just make sure this is also necessary for the fix and not sneaky, right? No, we don't need loads/stores. Since this following handling codes: (define_insn_and_split "*mov_lra" [(set (match_operand:VLS_AVL_REG 0 "reg_or_mem_operand" "=vr, m,vr") (match_operand:VLS_AVL_REG 1 "reg_or_mem_operand" " m,vr,vr")) (clobber (match_scratch:P 2 "=&r,&r,X"))] "TARGET_VECTOR && (lra_in_progress || reload_completed) && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" "#" "&& reload_completed" [(const_int 0)] { if (REG_P (operands[0]) && REG_P (operands[1])) emit_insn (gen_rtx_SET (operands[0], operands[1])); else { emit_move_insn (operands[2], gen_int_mode (GET_MODE_NUNITS (mode), Pmode)); unsigned insn_flags = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL ? riscv_vector::UNARY_MASK_OP : riscv_vector::UNARY_OP; riscv_vector::emit_nonvlmax_insn (code_for_pred_mov (mode), insn_flags, operands, operands[2]); } DONE; } [(set_attr "type" "vmov")] ) We split special case use emit_insn (gen_rtx_SET (operands[0], operands[1])); Missing this pattern will cause ICE but current testcases didn't produce such issues. This issue is recognized after I support this pattern. juzhe.zh...@rivai.ai From: Kito Cheng Date: 2023-09-11 10:18 To: Juzhe-Zhong CC: gcc-patches; kito.cheng Subject: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311] > diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md > index d208b418e5f..6f48f7d6232 100644 > --- a/gcc/config/riscv/autovec-vls.md > +++ b/gcc/config/riscv/autovec-vls.md > @@ -148,6 +148,14 @@ >[(set_attr "type" "vmov") > (set_attr "mode" "")]) > > +(define_insn "*mov_vls" > + [(set (match_operand:VLSB 0 "register_operand" "=vr") > + (match_operand:VLSB 1 "register_operand" " vr"))] > + "TARGET_VECTOR" > + "vmv1r.v\t%0,%1" > + [(set_attr "type" "vmov") > + (set_attr "mode" "")]) Should we also add loads and stores as well? and just make sure this is also necessary for the fix and not sneaky, right? > + > (define_expand "movmisalign" >[(set (match_operand:VLS 0 "nonimmediate_operand") > (match_operand:VLS 1 "general_operand"))]
Re: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]
OK, but could you split this patch into two patches? pre-approved for both. On Mon, Sep 11, 2023 at 10:36 AM juzhe.zh...@rivai.ai wrote: > > >> Should we also add loads and stores as well? > >> and just make sure this is also necessary for the fix and not sneaky, > >> right? > > No, we don't need loads/stores. Since this following handling codes: > (define_insn_and_split "*mov_lra" > [(set (match_operand:VLS_AVL_REG 0 "reg_or_mem_operand" "=vr, m,vr") > (match_operand:VLS_AVL_REG 1 "reg_or_mem_operand" " m,vr,vr")) >(clobber (match_scratch:P 2 "=&r,&r,X"))] > "TARGET_VECTOR && (lra_in_progress || reload_completed) >&& (register_operand (operands[0], mode) >|| register_operand (operands[1], mode))" > "#" > "&& reload_completed" > [(const_int 0)] > { > if (REG_P (operands[0]) && REG_P (operands[1])) > emit_insn (gen_rtx_SET (operands[0], operands[1])); > else > { > emit_move_insn (operands[2], gen_int_mode (GET_MODE_NUNITS > (mode), > Pmode)); > unsigned insn_flags > = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL > ? riscv_vector::UNARY_MASK_OP > : riscv_vector::UNARY_OP; > riscv_vector::emit_nonvlmax_insn (code_for_pred_mov > (mode), > insn_flags, operands, operands[2]); > } > DONE; > } > [(set_attr "type" "vmov")] > ) > > We split special case use emit_insn (gen_rtx_SET (operands[0], operands[1])); > > Missing this pattern will cause ICE but current testcases didn't produce such > issues. > This issue is recognized after I support this pattern. > > > > juzhe.zh...@rivai.ai > > From: Kito Cheng > Date: 2023-09-11 10:18 > To: Juzhe-Zhong > CC: gcc-patches; kito.cheng > Subject: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311] > > diff --git a/gcc/config/riscv/autovec-vls.md > > b/gcc/config/riscv/autovec-vls.md > > index d208b418e5f..6f48f7d6232 100644 > > --- a/gcc/config/riscv/autovec-vls.md > > +++ b/gcc/config/riscv/autovec-vls.md > > @@ -148,6 +148,14 @@ > >[(set_attr "type" "vmov") > > (set_attr "mode" "")]) > > > > +(define_insn "*mov_vls" > > + [(set (match_operand:VLSB 0 "register_operand" "=vr") > > + (match_operand:VLSB 1 "register_operand" " vr"))] > > + "TARGET_VECTOR" > > + "vmv1r.v\t%0,%1" > > + [(set_attr "type" "vmov") > > + (set_attr "mode" "")]) > > Should we also add loads and stores as well? > and just make sure this is also necessary for the fix and not sneaky, right? > > > + > > (define_expand "movmisalign" > >[(set (match_operand:VLS 0 "nonimmediate_operand") > > (match_operand:VLS 1 "general_operand"))] >
[Committed] RISC-V: Add missing VLS mask bool mode reg -> reg patterns
Committed. gcc/ChangeLog: * config/riscv/autovec-vls.md (*mov_vls): New pattern. * config/riscv/vector-iterators.md: New iterator --- gcc/config/riscv/autovec-vls.md | 8 gcc/config/riscv/vector-iterators.md | 15 +++ 2 files changed, 23 insertions(+) diff --git a/gcc/config/riscv/autovec-vls.md b/gcc/config/riscv/autovec-vls.md index d208b418e5f..6f48f7d6232 100644 --- a/gcc/config/riscv/autovec-vls.md +++ b/gcc/config/riscv/autovec-vls.md @@ -148,6 +148,14 @@ [(set_attr "type" "vmov") (set_attr "mode" "")]) +(define_insn "*mov_vls" + [(set (match_operand:VLSB 0 "register_operand" "=vr") + (match_operand:VLSB 1 "register_operand" " vr"))] + "TARGET_VECTOR" + "vmv1r.v\t%0,%1" + [(set_attr "type" "vmov") + (set_attr "mode" "")]) + (define_expand "movmisalign" [(set (match_operand:VLS 0 "nonimmediate_operand") (match_operand:VLS 1 "general_operand"))] diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index a98ed9fcbb6..5694c0c8f37 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -2425,6 +2425,21 @@ (V256DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 2048") (V512DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 4096")]) +(define_mode_iterator VLSB [ + (V1BI "TARGET_VECTOR_VLS") + (V2BI "TARGET_VECTOR_VLS") + (V4BI "TARGET_VECTOR_VLS") + (V8BI "TARGET_VECTOR_VLS") + (V16BI "TARGET_VECTOR_VLS") + (V32BI "TARGET_VECTOR_VLS") + (V64BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 64") + (V128BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 128") + (V256BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 256") + (V512BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 512") + (V1024BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 1024") + (V2048BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 2048") + (V4096BI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 4096")]) + ;; VLS modes that has NUNITS < 32. (define_mode_iterator VLS_AVL_IMM [ (V1QI "TARGET_VECTOR_VLS") -- 2.36.3
Re: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311]
Sure. Thanks kito. juzhe.zh...@rivai.ai From: Kito Cheng Date: 2023-09-11 10:57 To: juzhe.zh...@rivai.ai CC: gcc-patches; Kito.cheng Subject: Re: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311] OK, but could you split this patch into two patches? pre-approved for both. On Mon, Sep 11, 2023 at 10:36 AM juzhe.zh...@rivai.ai wrote: > > >> Should we also add loads and stores as well? > >> and just make sure this is also necessary for the fix and not sneaky, > >> right? > > No, we don't need loads/stores. Since this following handling codes: > (define_insn_and_split "*mov_lra" > [(set (match_operand:VLS_AVL_REG 0 "reg_or_mem_operand" "=vr, m,vr") > (match_operand:VLS_AVL_REG 1 "reg_or_mem_operand" " m,vr,vr")) >(clobber (match_scratch:P 2 "=&r,&r,X"))] > "TARGET_VECTOR && (lra_in_progress || reload_completed) >&& (register_operand (operands[0], mode) >|| register_operand (operands[1], mode))" > "#" > "&& reload_completed" > [(const_int 0)] > { > if (REG_P (operands[0]) && REG_P (operands[1])) > emit_insn (gen_rtx_SET (operands[0], operands[1])); > else > { > emit_move_insn (operands[2], gen_int_mode (GET_MODE_NUNITS > (mode), > Pmode)); > unsigned insn_flags > = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL > ? riscv_vector::UNARY_MASK_OP > : riscv_vector::UNARY_OP; > riscv_vector::emit_nonvlmax_insn (code_for_pred_mov > (mode), > insn_flags, operands, operands[2]); > } > DONE; > } > [(set_attr "type" "vmov")] > ) > > We split special case use emit_insn (gen_rtx_SET (operands[0], operands[1])); > > Missing this pattern will cause ICE but current testcases didn't produce such > issues. > This issue is recognized after I support this pattern. > > > > juzhe.zh...@rivai.ai > > From: Kito Cheng > Date: 2023-09-11 10:18 > To: Juzhe-Zhong > CC: gcc-patches; kito.cheng > Subject: Re: [PATCH] RISC-V: Add VLS modes VEC_PERM support[PR111311] > > diff --git a/gcc/config/riscv/autovec-vls.md > > b/gcc/config/riscv/autovec-vls.md > > index d208b418e5f..6f48f7d6232 100644 > > --- a/gcc/config/riscv/autovec-vls.md > > +++ b/gcc/config/riscv/autovec-vls.md > > @@ -148,6 +148,14 @@ > >[(set_attr "type" "vmov") > > (set_attr "mode" "")]) > > > > +(define_insn "*mov_vls" > > + [(set (match_operand:VLSB 0 "register_operand" "=vr") > > + (match_operand:VLSB 1 "register_operand" " vr"))] > > + "TARGET_VECTOR" > > + "vmv1r.v\t%0,%1" > > + [(set_attr "type" "vmov") > > + (set_attr "mode" "")]) > > Should we also add loads and stores as well? > and just make sure this is also necessary for the fix and not sneaky, right? > > > + > > (define_expand "movmisalign" > >[(set (match_operand:VLS 0 "nonimmediate_operand") > > (match_operand:VLS 1 "general_operand"))] >
[Committed V2] RISC-V: Add VLS modes VEC_PERM support[PR111311]
This patch add VLS modes VEC_PERM support which fix these following FAILs in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111311: FAIL: gcc.dg/tree-ssa/forwprop-40.c scan-tree-dump-times optimized "BIT_FIELD_REF" 0 FAIL: gcc.dg/tree-ssa/forwprop-40.c scan-tree-dump-times optimized "BIT_INSERT_EXPR" 0 FAIL: gcc.dg/tree-ssa/forwprop-41.c scan-tree-dump-times optimized "BIT_FIELD_REF" 0 FAIL: gcc.dg/tree-ssa/forwprop-41.c scan-tree-dump-times optimized "BIT_INSERT_EXPR" 1 These FAILs are fixed after this patch. gcc/ChangeLog: * config/riscv/autovec.md: Add VLS modes. * config/riscv/riscv-protos.h (cmp_lmul_le_one): New function. (cmp_lmul_gt_one): Ditto. * config/riscv/riscv-v.cc (cmp_lmul_le_one): Ditto. (cmp_lmul_gt_one): Ditto. * config/riscv/riscv.cc (riscv_print_operand): Add VLS modes. (riscv_vectorize_vec_perm_const): Ditto. * config/riscv/vector-iterators.md: Ditto. * config/riscv/vector.md: Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/partial/slp-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/partial/slp-16.c: Ditto. * gcc.target/riscv/rvv/autovec/partial/slp-17.c: Ditto. * gcc.target/riscv/rvv/autovec/partial/slp-3.c: Ditto. * gcc.target/riscv/rvv/autovec/partial/slp-5.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/compress-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/compress-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/compress-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/compress-4.c: New test. * gcc.target/riscv/rvv/autovec/vls/compress-5.c: New test. * gcc.target/riscv/rvv/autovec/vls/compress-6.c: New test. * gcc.target/riscv/rvv/autovec/vls/merge-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/merge-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/merge-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/merge-4.c: New test. * gcc.target/riscv/rvv/autovec/vls/merge-5.c: New test. * gcc.target/riscv/rvv/autovec/vls/merge-6.c: New test. * gcc.target/riscv/rvv/autovec/vls/merge-7.c: New test. * gcc.target/riscv/rvv/autovec/vls/perm-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/perm-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/perm-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/perm-4.c: New test. * gcc.target/riscv/rvv/autovec/vls/perm-5.c: New test. * gcc.target/riscv/rvv/autovec/vls/perm-6.c: New test. * gcc.target/riscv/rvv/autovec/vls/perm-7.c: New test. --- gcc/config/riscv/autovec.md | 6 +- gcc/config/riscv/riscv-protos.h | 2 + gcc/config/riscv/riscv-v.cc | 22 ++ gcc/config/riscv/riscv.cc | 4 +- gcc/config/riscv/vector-iterators.md | 289 - gcc/config/riscv/vector.md| 302 +- .../riscv/rvv/autovec/partial/slp-1.c | 2 +- .../riscv/rvv/autovec/partial/slp-16.c| 2 +- .../riscv/rvv/autovec/partial/slp-17.c| 2 +- .../riscv/rvv/autovec/partial/slp-3.c | 2 +- .../riscv/rvv/autovec/partial/slp-5.c | 2 +- .../riscv/rvv/autovec/vls/compress-1.c| 6 + .../riscv/rvv/autovec/vls/compress-2.c| 7 + .../riscv/rvv/autovec/vls/compress-3.c| 7 + .../riscv/rvv/autovec/vls/compress-4.c| 7 + .../riscv/rvv/autovec/vls/compress-5.c| 6 + .../riscv/rvv/autovec/vls/compress-6.c| 6 + .../riscv/rvv/autovec/vls/merge-1.c | 6 + .../riscv/rvv/autovec/vls/merge-2.c | 6 + .../riscv/rvv/autovec/vls/merge-3.c | 6 + .../riscv/rvv/autovec/vls/merge-4.c | 6 + .../riscv/rvv/autovec/vls/merge-5.c | 6 + .../riscv/rvv/autovec/vls/merge-6.c | 6 + .../riscv/rvv/autovec/vls/merge-7.c | 6 + .../gcc.target/riscv/rvv/autovec/vls/perm-1.c | 6 + .../gcc.target/riscv/rvv/autovec/vls/perm-2.c | 6 + .../gcc.target/riscv/rvv/autovec/vls/perm-3.c | 6 + .../gcc.target/riscv/rvv/autovec/vls/perm-4.c | 8 + .../gcc.target/riscv/rvv/autovec/vls/perm-5.c | 6 + .../gcc.target/riscv/rvv/autovec/vls/perm-6.c | 6 + .../gcc.target/riscv/rvv/autovec/vls/perm-7.c | 6 + 31 files changed, 584 insertions(+), 176 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/compress-6.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
[PATCH] RISC-V: Use dominance analysis in global vsetvl elimination
I found that it's more reasonable to use existing dominance analysis. gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pass_vsetvl::global_eliminate_vsetvl_insn): Use dominance analysis. (pass_vsetvl::init): Ditto. (pass_vsetvl::done): Ditto. --- gcc/config/riscv/riscv-vsetvl.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 134b97737ae..f81361c4ccd 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -4054,7 +4054,7 @@ pass_vsetvl::global_eliminate_vsetvl_insn (const bb_info *bb) const } /* Step1: Reshape the VL/VTYPE status to make sure everything compatible. */ - hash_set pred_cfg_bbs = get_all_predecessors (cfg_bb); + auto_vec pred_cfg_bbs = get_dominated_by (CDI_POST_DOMINATORS, cfg_bb); FOR_EACH_EDGE (e, ei, cfg_bb->preds) { sbitmap avout = m_vector_manager->vector_avout[e->src->index]; @@ -4243,6 +4243,7 @@ pass_vsetvl::init (void) { /* Initialization of RTL_SSA. */ calculate_dominance_info (CDI_DOMINATORS); + calculate_dominance_info (CDI_POST_DOMINATORS); df_analyze (); crtl->ssa = new function_info (cfun); } @@ -4264,6 +4265,7 @@ pass_vsetvl::done (void) { /* Finalization of RTL_SSA. */ free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); if (crtl->ssa->perform_pending_updates ()) cleanup_cfg (0); delete crtl->ssa; -- 2.36.3
Re: [PATCH] RISC-V: Enable RVV scalable vectorization by default[PR111311]
Ping this patch. I think it's time to enable scalable vectorization by default and do the whole regression every time (except vect.exp that we didn't enable yet) Update current FAILs status: Real FAILS (ICE and execution FAIL): FAIL: gcc.dg/pr70252.c (internal compiler error: in gimple_expand_vec_cond_expr, at gimple-isel.cc:284) FAIL: gcc.dg/pr70252.c (test for excess errors) FAIL: gcc.dg/pr92301.c execution test Robin is working on these 3 issues and will be solved soon. FAIL: g++.dg/torture/vshuf-v4df.C -O2 -flto -fno-use-linker-plugin -flto-partition=none (internal compiler error: in as_a, at machmode.h:381) FAIL: g++.dg/torture/vshuf-v4df.C -O2 -flto -fno-use-linker-plugin -flto-partition=none (test for excess errors) FAIL: g++.dg/torture/vshuf-v4df.C -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects (internal compiler error: in as_a, at machmode.h:381) FAIL: g++.dg/torture/vshuf-v4df.C -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects (test for excess errors) This is a long time known issue I have mentioned many times, we need help for LTO since it's caused by mode bits extension. The rest bogus FAILs: FAIL: gcc.dg/unroll-8.c scan-rtl-dump loop2_unroll "Not unrolling loop, doesn't roll" FAIL: gcc.dg/unroll-8.c scan-rtl-dump loop2_unroll "likely upper bound: 6" FAIL: gcc.dg/unroll-8.c scan-rtl-dump loop2_unroll "realistic bound: -1" FAIL: gcc.dg/var-expand1.c scan-rtl-dump loop2_unroll "Expanding Accumulator" FAIL: gcc.dg/tree-ssa/cunroll-16.c scan-tree-dump cunroll "optimized: loop with [0-9]+ iterations completely unrolled" FAIL: gcc.dg/tree-ssa/cunroll-16.c scan-tree-dump-not optimized "foo" FAIL: gcc.dg/tree-ssa/forwprop-40.c scan-tree-dump-times optimized "BIT_FIELD_REF" 0 FAIL: gcc.dg/tree-ssa/forwprop-40.c scan-tree-dump-times optimized "BIT_INSERT_EXPR" 0 FAIL: gcc.dg/tree-ssa/forwprop-41.c scan-tree-dump-times optimized "BIT_FIELD_REF" 0 FAIL: gcc.dg/tree-ssa/forwprop-41.c scan-tree-dump-times optimized "BIT_INSERT_EXPR" 1 FAIL: gcc.dg/tree-ssa/gen-vect-11b.c scan-tree-dump-times vect "vectorized 0 loops" 1 FAIL: gcc.dg/tree-ssa/gen-vect-11c.c scan-tree-dump-times vect "vectorized 0 loops" 1 FAIL: gcc.dg/tree-ssa/gen-vect-26.c scan-tree-dump-times vect "Alignment of access forced using peeling" 1 FAIL: gcc.dg/tree-ssa/gen-vect-28.c scan-tree-dump-times vect "Alignment of access forced using peeling" 1 FAIL: gcc.dg/tree-ssa/loop-bound-1.c scan-tree-dump ivopts "bounded by 254" FAIL: gcc.dg/tree-ssa/loop-bound-2.c scan-tree-dump ivopts "bounded by 254" FAIL: gcc.dg/tree-ssa/predcom-2.c scan-tree-dump-times pcom "Unrolling 2 times." 2 FAIL: gcc.dg/tree-ssa/predcom-4.c scan-tree-dump-times pcom "Combination" 1 FAIL: gcc.dg/tree-ssa/predcom-4.c scan-tree-dump-times pcom "Unrolling 3 times." 1 FAIL: gcc.dg/tree-ssa/predcom-5.c scan-tree-dump-times pcom "Combination" 2 FAIL: gcc.dg/tree-ssa/predcom-5.c scan-tree-dump-times pcom "Unrolling 3 times." 1 FAIL: gcc.dg/tree-ssa/predcom-9.c scan-tree-dump pcom "Executing predictive commoning without unrolling" FAIL: gcc.dg/tree-ssa/reassoc-46.c scan-tree-dump-times optimized "(?:vect_)?sum_[\\d._]+ = (?:(?:vect_)?_[\\d._]+ \\+ (?:vect_)?sum_[\\d._]+|(?:v ect_)?sum_[\\d._]+ \\+ (?:vect_)?_[\\d._]+)" 1 FAIL: gcc.dg/tree-ssa/scev-10.c scan-tree-dump-times ivopts " Type:\\tREFERENCE ADDRESS\n" 1 FAIL: gcc.dg/tree-ssa/scev-11.c scan-tree-dump-times ivopts " Type:\\tREFERENCE ADDRESS\n" 2 FAIL: gcc.dg/tree-ssa/scev-14.c scan-tree-dump ivopts "Overflowness wrto loop niter:\tNo-overflow" FAIL: gcc.dg/tree-ssa/scev-9.c scan-tree-dump-times ivopts " Type:\\tREFERENCE ADDRESS\n" 1 FAIL: gcc.dg/tree-ssa/split-path-11.c scan-tree-dump-times split-paths "join point for if-convertable half-diamond" 1 These are bogus dump FAILs and I have 100% confirm each of them, we are having same behavior as SVE. So is this patch ok for trunk ? juzhe.zh...@rivai.ai From: Juzhe-Zhong Date: 2023-09-07 15:28 To: gcc-patches CC: kito.cheng; kito.cheng; jeffreyalaw; rdapp.gcc; Juzhe-Zhong Subject: [PATCH] RISC-V: Enable RVV scalable vectorization by default[PR111311] This patch is not ready but they all will be fixed very soon. gcc/ChangeLog: * config/riscv/riscv.opt: Set default as scalable vectorization. --- gcc/config/riscv/riscv.opt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 98f342348b7..bf2eca08221 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -292,7 +292,7 @@ EnumValue Enum(riscv_autovec_preference) String(fixed-vlmax) Value(RVV_FIXED_VLMAX) -param=riscv-autovec-preference= -Target RejectNegative Joined Enum(riscv_autovec_preference) Var(riscv_autovec_preference) Init(NO_AUTOVEC) +Target RejectNegative Joined Enum(riscv_autovec_preference) Var(riscv_autovec_preference) Init(RVV_SCALABLE) -param=riscv-autovec-preference= Set the preference of auto-vectorization in the RISC-V port. Enum -- 2.36.3
[PATCH v3 0/9] Added support for SX/LSX vector instructions.
v2 -> v3: Standardize the code using the GNU format. In order to better test the function of the vector instruction, the 128 and 256 bit test cases are further split according to the function of the instruction. Xiaolong Chen (9): LoongArch: Add tests of -mstrict-align option. LoongArch: Add testsuite framework for Loongson SX/ASX. LoongArch: Add tests for Loongson SX builtin functions. LoongArch:Added support for SX vector floating-point instructions. LoongArch:Add SX instructions for vector arithmetic addition operations. LoongArch:Add vector subtraction arithmetic operation SX instruction. LoongArch:Add vector arithmetic addition vsadd instruction. LoongArch:Added SX vector arithmetic multiplication instruction. LoongArch:Add SX instructions for vector arithmetic operations other than multiplication, addition, and subtraction. .../gcc.target/loongarch/strict-align.c | 12 + .../loongarch/vector/loongarch-vector.exp | 42 + .../loongarch/vector/lsx/lsx-builtin.c| 5038 + .../loongarch/vector/lsx/lsx-vadd.c | 416 ++ .../loongarch/vector/lsx/lsx-vadda.c | 344 ++ .../loongarch/vector/lsx/lsx-vaddi.c | 251 + .../loongarch/vector/lsx/lsx-vaddwev-1.c | 335 ++ .../loongarch/vector/lsx/lsx-vaddwev-2.c | 344 ++ .../loongarch/vector/lsx/lsx-vaddwev-3.c | 425 ++ .../loongarch/vector/lsx/lsx-vaddwod-1.c | 408 ++ .../loongarch/vector/lsx/lsx-vaddwod-2.c | 344 ++ .../loongarch/vector/lsx/lsx-vaddwod-3.c | 237 + .../loongarch/vector/lsx/lsx-vavg-1.c | 398 ++ .../loongarch/vector/lsx/lsx-vavg-2.c | 308 + .../loongarch/vector/lsx/lsx-vavgr-1.c| 299 + .../loongarch/vector/lsx/lsx-vavgr-2.c| 317 ++ .../loongarch/vector/lsx/lsx-vdiv-1.c | 299 + .../loongarch/vector/lsx/lsx-vdiv-2.c | 254 + .../loongarch/vector/lsx/lsx-vexth-1.c| 342 ++ .../loongarch/vector/lsx/lsx-vexth-2.c| 182 + .../loongarch/vector/lsx/lsx-vfcvt-1.c| 398 ++ .../loongarch/vector/lsx/lsx-vfcvt-2.c| 278 + .../loongarch/vector/lsx/lsx-vffint-1.c | 161 + .../loongarch/vector/lsx/lsx-vffint-2.c | 264 + .../loongarch/vector/lsx/lsx-vffint-3.c | 102 + .../loongarch/vector/lsx/lsx-vfrint_d.c | 230 + .../loongarch/vector/lsx/lsx-vfrint_s.c | 350 ++ .../loongarch/vector/lsx/lsx-vftint-1.c | 349 ++ .../loongarch/vector/lsx/lsx-vftint-2.c | 695 +++ .../loongarch/vector/lsx/lsx-vftint-3.c | 1028 .../loongarch/vector/lsx/lsx-vftint-4.c | 345 ++ .../loongarch/vector/lsx/lsx-vhaddw-1.c | 488 ++ .../loongarch/vector/lsx/lsx-vhaddw-2.c | 452 ++ .../loongarch/vector/lsx/lsx-vhsubw-1.c | 327 ++ .../loongarch/vector/lsx/lsx-vhsubw-2.c | 353 ++ .../loongarch/vector/lsx/lsx-vldi.c | 61 + .../loongarch/vector/lsx/lsx-vmadd.c | 450 ++ .../loongarch/vector/lsx/lsx-vmaddwev-1.c | 472 ++ .../loongarch/vector/lsx/lsx-vmaddwev-2.c | 383 ++ .../loongarch/vector/lsx/lsx-vmaddwev-3.c | 383 ++ .../loongarch/vector/lsx/lsx-vmaddwod-1.c | 372 ++ .../loongarch/vector/lsx/lsx-vmaddwod-2.c | 438 ++ .../loongarch/vector/lsx/lsx-vmaddwod-3.c | 460 ++ .../loongarch/vector/lsx/lsx-vmax-1.c | 317 ++ .../loongarch/vector/lsx/lsx-vmax-2.c | 362 ++ .../loongarch/vector/lsx/lsx-vmaxi-1.c| 279 + .../loongarch/vector/lsx/lsx-vmaxi-2.c| 223 + .../loongarch/vector/lsx/lsx-vmin-1.c | 434 ++ .../loongarch/vector/lsx/lsx-vmin-2.c | 344 ++ .../loongarch/vector/lsx/lsx-vmini-1.c| 314 + .../loongarch/vector/lsx/lsx-vmini-2.c| 216 + .../loongarch/vector/lsx/lsx-vmskgez.c| 119 + .../loongarch/vector/lsx/lsx-vmskltz.c| 321 ++ .../loongarch/vector/lsx/lsx-vmsknz.c | 104 + .../loongarch/vector/lsx/lsx-vmsub.c | 461 ++ .../loongarch/vector/lsx/lsx-vmuh-1.c | 353 ++ .../loongarch/vector/lsx/lsx-vmuh-2.c | 372 ++ .../loongarch/vector/lsx/lsx-vmul.c | 282 + .../loongarch/vector/lsx/lsx-vmulwev-1.c | 434 ++ .../loongarch/vector/lsx/lsx-vmulwev-2.c | 344 ++ .../loongarch/vector/lsx/lsx-vmulwev-3.c | 245 + .../loongarch/vector/lsx/lsx-vmulwod-1.c | 272 + .../loongarch/vector/lsx/lsx-vmulwod-2.c | 282 + .../loongarch/vector/lsx/lsx-vmulwod-3.c | 308 + .../loongarch/vector/lsx/lsx-vneg.c | 321 ++ .../loongarch/vector/lsx/lsx-vsadd-1.c| 335 ++ .../loongarch/vector/lsx/lsx-vsadd-2.c| 345 ++ .../loongarch/vector/lsx/lsx-vsat-1.c | 231 + .../loongarch/vector/lsx/lsx-vsat-2.c | 272 + .../loongarch/vector/lsx/lsx-vsigncov.c | 425 ++ .../loongarch/vector/lsx/lsx-vssub-1.c| 398 ++ .../loongarch/vector/lsx/lsx-vssub-2.c| 408 ++ .../loongarch/vector/lsx/ls
[PATCH v3 2/9] LoongArch: Add testsuite framework for Loongson SX/ASX.
gcc/testsuite/ChangeLog: * gcc.target/loongarch/vector/loongarch-vector.exp: New test. * gcc.target/loongarch/vector/simd_correctness_check.h: New test. --- .../loongarch/vector/loongarch-vector.exp | 42 +++ .../loongarch/vector/simd_correctness_check.h | 54 +++ 2 files changed, 96 insertions(+) create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp new file mode 100644 index 000..f33bad82cb2 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp @@ -0,0 +1,42 @@ +#Copyright(C) 2021 - 2023 Free Software Foundation, Inc. + +#This program is free software; you can redistribute it and / or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3 of the License, or +#(at your option) any later version. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the +#GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +#along with GCC; see the file COPYING3.If not see +# . + +#GCC testsuite that uses the `dg.exp' driver. + +#Exit immediately if this isn't a LoongArch target. +if ![istarget loongarch*-*-*] then { +return +} + +#Load support procs. +load_lib gcc-dg.exp + +#If a testcase doesn't have special options, use these. +global DEFAULT_CFLAGS +if ![info exists DEFAULT_CFLAGS] then { +set DEFAULT_CFLAGS " -mlasx" +} + +#Initialize `dg'. +dg-init + +#Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \ + "" $DEFAULT_CFLAGS +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \ + "" $DEFAULT_CFLAGS +# All done. +dg-finish diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h new file mode 100644 index 000..eb7fbd59cc7 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h @@ -0,0 +1,54 @@ +#include +#include +#include + +#define ASSERTEQ_64(line, ref, res) \ + do \ +{ \ + int fail = 0; \ + for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \ +{ \ + long *temp_ref = &ref[i], *temp_res = &res[i]; \ + if (abs (*temp_ref - *temp_res) > 0)\ +{ \ + printf (" error: %s at line %ld , expected " #ref \ + "[%ld]:0x%lx, got: 0x%lx\n",\ + __FILE__, line, i, *temp_ref, *temp_res); \ + fail = 1; \ +} \ +} \ + if (fail == 1) \ +abort (); \ +} \ + while (0) + +#define ASSERTEQ_32(line, ref, res) \ + do \ +{ \ + int fail = 0; \ + for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \ +{ \ + int *temp_ref = &ref[i], *temp_res = &res[i]; \ + if (abs (*temp_ref - *temp_res) > 0)\ +{ \ + printf (" error: %s at line %ld , expected " #ref \ + "[%ld]:0x%x, got: 0x%x\n", \ + __FILE__, line, i, *temp_ref, *temp_res); \ + fail = 1; \ +
[PATCH v3 1/9] LoongArch: Add tests of -mstrict-align option.
gcc/testsuite/ChangeLog: * gcc.target/loongarch/strict-align.c: New test. --- gcc/testsuite/gcc.target/loongarch/strict-align.c | 12 1 file changed, 12 insertions(+) create mode 100644 gcc/testsuite/gcc.target/loongarch/strict-align.c diff --git a/gcc/testsuite/gcc.target/loongarch/strict-align.c b/gcc/testsuite/gcc.target/loongarch/strict-align.c new file mode 100644 index 000..040d849584b --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/strict-align.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mstrict-align -mlasx" } */ +/* { dg-final { scan-assembler-not "vfadd.s" } } */ + +void +foo (float *restrict x, float *restrict y) +{ + x[0] = x[0] + y[0]; + x[1] = x[1] + y[1]; + x[2] = x[2] + y[2]; + x[3] = x[3] + y[3]; +} -- 2.20.1
[PATCH v3 3/9] LoongArch: Add tests for Loongson SX builtin functions.
gcc/testsuite/ChangeLog: * gcc.target/loongarch/vector/lsx/lsx-builtin.c: New test. --- .../loongarch/vector/lsx/lsx-builtin.c| 5038 + 1 file changed, 5038 insertions(+) create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c new file mode 100644 index 000..dcc8f9211bd --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-builtin.c @@ -0,0 +1,5038 @@ +/* Test builtins for LOONGARCH LSX ASE instructions */ +/* { dg-do compile } */ +/* { dg-options "-mlsx" } */ +/* { dg-final { scan-assembler-times "lsx_vsll_b:.*vsll\\.b.*lsx_vsll_b" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsll_h:.*vsll\\.h.*lsx_vsll_h" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsll_w:.*vsll\\.w.*lsx_vsll_w" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsll_d:.*vsll\\.d.*lsx_vsll_d" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vslli_b:.*vslli\\.b.*lsx_vslli_b" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vslli_h:.*vslli\\.h.*lsx_vslli_h" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vslli_w:.*vslli\\.w.*lsx_vslli_w" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vslli_d:.*vslli\\.d.*lsx_vslli_d" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsra_b:.*vsra\\.b.*lsx_vsra_b" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsra_h:.*vsra\\.h.*lsx_vsra_h" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsra_w:.*vsra\\.w.*lsx_vsra_w" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsra_d:.*vsra\\.d.*lsx_vsra_d" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsrai_b:.*vsrai\\.b.*lsx_vsrai_b" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrai_h:.*vsrai\\.h.*lsx_vsrai_h" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrai_w:.*vsrai\\.w.*lsx_vsrai_w" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrai_d:.*vsrai\\.d.*lsx_vsrai_d" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrar_b:.*vsrar\\.b.*lsx_vsrar_b" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrar_h:.*vsrar\\.h.*lsx_vsrar_h" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrar_w:.*vsrar\\.w.*lsx_vsrar_w" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrar_d:.*vsrar\\.d.*lsx_vsrar_d" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrari_b:.*vsrari\\.b.*lsx_vsrari_b" + * 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vsrari_h:.*vsrari\\.h.*lsx_vsrari_h" + * 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vsrari_w:.*vsrari\\.w.*lsx_vsrari_w" + * 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vsrari_d:.*vsrari\\.d.*lsx_vsrari_d" + * 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vsrl_b:.*vsrl\\.b.*lsx_vsrl_b" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsrl_h:.*vsrl\\.h.*lsx_vsrl_h" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsrl_w:.*vsrl\\.w.*lsx_vsrl_w" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsrl_d:.*vsrl\\.d.*lsx_vsrl_d" 1 } } + */ +/* { dg-final { scan-assembler-times "lsx_vsrli_b:.*vsrli\\.b.*lsx_vsrli_b" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrli_h:.*vsrli\\.h.*lsx_vsrli_h" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrli_w:.*vsrli\\.w.*lsx_vsrli_w" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrli_d:.*vsrli\\.d.*lsx_vsrli_d" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrlr_b:.*vsrlr\\.b.*lsx_vsrlr_b" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrlr_h:.*vsrlr\\.h.*lsx_vsrlr_h" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrlr_w:.*vsrlr\\.w.*lsx_vsrlr_w" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrlr_d:.*vsrlr\\.d.*lsx_vsrlr_d" 1 } + * } */ +/* { dg-final { scan-assembler-times "lsx_vsrlri_b:.*vsrlri\\.b.*lsx_vsrlri_b" + * 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vsrlri_h:.*vsrlri\\.h.*lsx_vsrlri_h" + * 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vsrlri_w:.*vsrlri\\.w.*lsx_vsrlri_w" + * 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vsrlri_d:.*vsrlri\\.d.*lsx_vsrlri_d" + * 1 } } */ +/* { dg-final { scan-assembler-times + * "lsx_vbitclr_b:.*vbitclr\\.b.*lsx_vbitclr_b" 1 } } */ +/* { dg-final { scan-assembler-times + * "lsx_vbitclr_h:.*vbitclr\\.h.*lsx_vbitclr_h" 1 } } */ +/* { dg-final { scan-assembler-times + * "lsx_vbitclr_w:.*vbitclr\\.w.*lsx_vbitclr_w" 1 } } */ +/* { dg-final { scan-assembler-times + * "lsx_vbitclr_d:.*vbitclr\\.d.*lsx_vbitclr_d" 1 } } */ +/* { dg-final { scan-assembler-times + * "lsx_vbitclri_b:.*vbitclri\\.b.*lsx_vbitclri_b" 1 } } */ +/* { dg-final { scan-assembler-times + * "lsx_vbitclri_h:.*vbitclri\\.h.*lsx_vbitclri_h" 1 } } */ +/* { dg-final { scan-assembler-times + * "lsx_vbitclri_w:.*vbitclri\\.w.*lsx_vbitclri_w" 1 } } */ +/* { dg-final { scan-assembler-times + * "lsx_vbitclri_d:.*vbitclri\\
[PATCH v3 7/9] LoongArch:Add vector arithmetic addition vsadd instruction.
gcc/testsuite/ChangeLog: * gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c: New test. * gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c: New test. --- .../loongarch/vector/lsx/lsx-vsadd-1.c| 335 + .../loongarch/vector/lsx/lsx-vsadd-2.c| 345 ++ 2 files changed, 680 insertions(+) create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c new file mode 100644 index 000..1bc27c983bb --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c @@ -0,0 +1,335 @@ +/* { dg-do run } */ +/* { dg-options "-mlsx -w -fno-strict-aliasing" } */ +#include "../simd_correctness_check.h" +#include + +int +main () +{ + __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result; + __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result; + __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result; + + int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail; + long int long_op0, long_op1, long_op2, lont_out, lont_result; + long int long_int_out, long_int_result; + unsigned int unsigned_int_out, unsigned_int_result; + unsigned long int unsigned_long_int_out, unsigned_long_int_result; + + *((unsigned long *)&__m128i_op0[1]) = 0x; + *((unsigned long *)&__m128i_op0[0]) = 0x; + *((unsigned long *)&__m128i_op1[1]) = 0x; + *((unsigned long *)&__m128i_op1[0]) = 0x; + *((unsigned long *)&__m128i_result[1]) = 0x; + *((unsigned long *)&__m128i_result[0]) = 0x; + __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + + *((unsigned long *)&__m128i_op0[1]) = 0x; + *((unsigned long *)&__m128i_op0[0]) = 0x; + *((unsigned long *)&__m128i_op1[1]) = 0x; + *((unsigned long *)&__m128i_op1[0]) = 0x; + *((unsigned long *)&__m128i_result[1]) = 0x; + *((unsigned long *)&__m128i_result[0]) = 0xfefefefefefefefe; + __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + + *((unsigned long *)&__m128i_op0[1]) = 0x; + *((unsigned long *)&__m128i_op0[0]) = 0x; + *((unsigned long *)&__m128i_op1[1]) = 0x3c992b2e; + *((unsigned long *)&__m128i_op1[0]) = 0x730f; + *((unsigned long *)&__m128i_result[1]) = 0x3c992b2e; + *((unsigned long *)&__m128i_result[0]) = 0x730f; + __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + + *((unsigned long *)&__m128i_op0[1]) = 0x; + *((unsigned long *)&__m128i_op0[0]) = 0x; + *((unsigned long *)&__m128i_op1[1]) = 0x; + *((unsigned long *)&__m128i_op1[0]) = 0x; + *((unsigned long *)&__m128i_result[1]) = 0x; + *((unsigned long *)&__m128i_result[0]) = 0x; + __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + + *((unsigned long *)&__m128i_op0[1]) = 0x; + *((unsigned long *)&__m128i_op0[0]) = 0x; + *((unsigned long *)&__m128i_op1[1]) = 0x; + *((unsigned long *)&__m128i_op1[0]) = 0x; + *((unsigned long *)&__m128i_result[1]) = 0x; + *((unsigned long *)&__m128i_result[0]) = 0x; + __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + + *((unsigned long *)&__m128i_op0[1]) = 0x7fff7fff; + *((unsigned long *)&__m128i_op0[0]) = 0x; + *((unsigned long *)&__m128i_op1[1]) = 0x; + *((unsigned long *)&__m128i_op1[0]) = 0x2bfd9461; + *((unsigned long *)&__m128i_result[1]) = 0x7fff7fff; + *((unsigned long *)&__m128i_result[0]) = 0x2bfd9461; + __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + + *((unsigned long *)&__m128i_op0[1]) = 0x00d3012acc56f9bb; + *((unsigned long *)&__m128i_op0[0]) = 0x1021; + *((unsigned long *)&__m128i_op1[1]) = 0x; + *((unsigned long *)&__m128i_op1[0]) = 0x; + *((unsigned long *)&__m128i_result[1]) = 0x00d3012acc56f9bb; + *((unsigned long *)&__m128i_result[0]) = 0x1021; + __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1); + ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); + + *((unsigned long *)&__m128i_op0[1]) = 0x1000; + *((unsigned long *)&__m128i_op0[0]) = 0x0
Re: [PATCH v3 4/9] LoongArch:Added support for SX vector floating-point instructions.
The subject should be "Add tests for SX vector floating-point instructions". The "support" has already been added. Likewise for patches 5-9. -- Xi Ruoyao School of Aerospace Science and Technology, Xidian University
Re: [PATCH] s390: Fix builtins vec_rli and verll
On Mon, Aug 28, 2023 at 11:33:37AM +0200, Andreas Krebbel wrote: > Hi Stefan, > > do you really need to introduce a new flag for U64 given that the type of the > builtin is unsigned long? In function s390_const_operand_ok the immediate is checked whether it is valide w.r.t. the flag: tree_to_uhwi (arg) > ((HOST_WIDE_INT_1U << (bitwidth - 1) << 1) - 1) Here bitwidth is derived from the flag. Cheers, Stefan > > Andreas > > On 8/21/23 17:56, Stefan Schulze Frielinghaus wrote: > > The second argument of these builtins is an unsigned immediate. For > > vec_rli the API allows immediates up to 64 bits whereas the instruction > > verll only allows immediates up to 32 bits. Since the shift count > > equals the immediate modulo vector element size, truncating those > > immediates is fine. > > > > Bootstrapped and regtested on s390. Ok for mainline? > > > > gcc/ChangeLog: > > > > * config/s390/s390-builtins.def (O_U64): New. > > (O1_U64): Ditto. > > (O2_U64): Ditto. > > (O3_U64): Ditto. > > (O4_U64): Ditto. > > (O_M12): Change bit position. > > (O_S2): Ditto. > > (O_S3): Ditto. > > (O_S4): Ditto. > > (O_S5): Ditto. > > (O_S8): Ditto. > > (O_S12): Ditto. > > (O_S16): Ditto. > > (O_S32): Ditto. > > (O_ELEM): Ditto. > > (O_LIT): Ditto. > > (OB_DEF_VAR): Add operand constraints. > > (B_DEF): Ditto. > > * config/s390/s390.cc (s390_const_operand_ok): Honour 64 bit > > operands. > > --- > > gcc/config/s390/s390-builtins.def | 60 ++- > > gcc/config/s390/s390.cc | 6 ++-- > > 2 files changed, 39 insertions(+), 27 deletions(-) > > > > diff --git a/gcc/config/s390/s390-builtins.def > > b/gcc/config/s390/s390-builtins.def > > index a16983b18bd..c829f445a11 100644 > > --- a/gcc/config/s390/s390-builtins.def > > +++ b/gcc/config/s390/s390-builtins.def > > @@ -28,6 +28,7 @@ > > #undef O_U12 > > #undef O_U16 > > #undef O_U32 > > +#undef O_U64 > > > > #undef O_M12 > > > > @@ -88,6 +89,11 @@ > > #undef O3_U32 > > #undef O4_U32 > > > > +#undef O1_U64 > > +#undef O2_U64 > > +#undef O3_U64 > > +#undef O4_U64 > > + > > #undef O1_M12 > > #undef O2_M12 > > #undef O3_M12 > > @@ -157,20 +163,21 @@ > > #define O_U127 /* unsigned 16 bit literal */ > > #define O_U168 /* unsigned 16 bit literal */ > > #define O_U329 /* unsigned 32 bit literal */ > > +#define O_U64 10 /* unsigned 64 bit literal */ > > > > -#define O_M12 10 /* matches bitmask of 12 */ > > +#define O_M12 11 /* matches bitmask of 12 */ > > > > -#define O_S211 /* signed 2 bit literal */ > > -#define O_S312 /* signed 3 bit literal */ > > -#define O_S413 /* signed 4 bit literal */ > > -#define O_S514 /* signed 5 bit literal */ > > -#define O_S815 /* signed 8 bit literal */ > > -#define O_S12 16 /* signed 12 bit literal */ > > -#define O_S16 17 /* signed 16 bit literal */ > > -#define O_S32 18 /* signed 32 bit literal */ > > +#define O_S212 /* signed 2 bit literal */ > > +#define O_S313 /* signed 3 bit literal */ > > +#define O_S414 /* signed 4 bit literal */ > > +#define O_S515 /* signed 5 bit literal */ > > +#define O_S816 /* signed 8 bit literal */ > > +#define O_S12 17 /* signed 12 bit literal */ > > +#define O_S16 18 /* signed 16 bit literal */ > > +#define O_S32 19 /* signed 32 bit literal */ > > > > -#define O_ELEM 19 /* Element selector requiring modulo arithmetic. */ > > -#define O_LIT 20 /* Operand must be a literal fitting the target type. > > */ > > +#define O_ELEM 20 /* Element selector requiring modulo arithmetic. */ > > +#define O_LIT 21 /* Operand must be a literal fitting the target type. > > */ > > > > #define O_SHIFT 5 > > > > @@ -223,6 +230,11 @@ > > #define O3_U32 (O_U32 << (2 * O_SHIFT)) > > #define O4_U32 (O_U32 << (3 * O_SHIFT)) > > > > +#define O1_U64 O_U64 > > +#define O2_U64 (O_U64 << O_SHIFT) > > +#define O3_U64 (O_U64 << (2 * O_SHIFT)) > > +#define O4_U64 (O_U64 << (3 * O_SHIFT)) > > + > > #define O1_M12 O_M12 > > #define O2_M12 (O_M12 << O_SHIFT) > > #define O3_M12 (O_M12 << (2 * O_SHIFT)) > > @@ -1989,19 +2001,19 @@ B_DEF (s390_verllvf, vrotlv4si3, > > 0, > > B_DEF (s390_verllvg, vrotlv2di3, 0, > > B_VX, 0, BT_FN_UV2DI_UV2DI_UV2DI) > > > > OB_DEF (s390_vec_rli, s390_vec_rli_u8, > > s390_vec_rli_s64, B_VX, BT_FN_OV4SI_OV4SI_ULONG) > > -OB_DEF_VAR (s390_vec_rli_u8,s390_verllb,0, > > 0, BT_OV_UV16QI_UV16QI_ULONG) > > -OB_DEF_VAR (s390_vec_rli_s8,s390_verllb,0, > > 0, BT_OV_V16QI_V16QI_ULONG) > > -OB_DEF_VAR (s390_vec_rli_u16, s390_verllh,0, > > 0, BT_OV_UV8HI_UV8HI_ULONG) > > -OB_DEF_VAR (s390_ve