Re: libgo patch committed: Add S/390 support to internal/cpu package
On Fri, Feb 15, 2019 at 08:59:29PM +0100, Matthias Klose wrote: > On 15.02.19 15:52, Ian Lance Taylor wrote: > > This patch by Robin Dapp adds S/390 support to the internal/cpu > > package. This partially addresses PR 89123. I bootstrapped it on > > x86_64-pc-linux-gnu, which means little. Committed to mainline. > > fails in the -m31 multilib variant with Indeed. Given that there is just libgo/go/internal/cpu/cpu_s390x.go libgo/go/internal/cpu/cpu_s390x_test.go (note, no s390), I think the easiest fix is: --- libgo/go/internal/cpu/cpu_gccgo.c.jj2019-02-16 07:57:27.882179972 +0100 +++ libgo/go/internal/cpu/cpu_gccgo.c 2019-02-16 08:36:37.241900882 +0100 @@ -71,7 +71,7 @@ struct xgetbv_ret xgetbv(void) { #endif /* defined(__i386__) || defined(__x86_64__) */ -#ifdef __s390__ +#ifdef __s390x__ struct facilityList { uint64_t bits[4]; @@ -184,4 +184,4 @@ struct queryResult klmdQuery() { return ret; } -#endif /* defined(__s390__) */ +#endif /* defined(__s390x__) */ If cpu_s390.go is ever added, this can be changed again and there can be say #ifdef __s390x__ #define LHI "lghi" #else #define LHI "lhi" #endif and replace "lghi ... in the inline asm with LHI "... Jakub
[PATCH] Decrease {i386,sse}.md global state by 12KB
Hi! This is something I've noticed in a s390 change I'll post soon (where it was even completely unnecessary), but it applies to i386 backend too. Seems we have lots of .bss global state, 66x 64-byte and 61x 128-byte long static buffers. Instead of doing static char buf[128]; ... s{,n}printf (buf, ...); ... return buf; in the insn templates we can do: char buf[128]; ... s{,n}printf (buf, ...); ... output_asm_insn (buf, operands); return ""; and avoid that way the global state. The only problem with that is that final.c does something in between: 1) if return from the template is NULL, not this case 2) if return from the template is "#", not this case 3) if (targetm.asm_out.unwind_emit_before_insn && targetm.asm_out.unwind_emit) targetm.asm_out.unwind_emit (asm_out_file, insn); while cygming.h has #define TARGET_ASM_UNWIND_EMIT i386_pe_seh_unwind_emit #define TARGET_ASM_UNWIND_EMIT_BEFORE_INSN false it is ok too (and other i386 subtargets don't do either, so unwind_emit_before_insn is true (the default) and unwind_emit NULL 4) rtx_call_insn *call_insn = dyn_cast (insn); if (call_insn != NULL) that is for calls only, the patch doesn't change any calls Those 4 spots are in between get_insn_template and output_asm_insn (templ, recog_data.operand); which starts with: /* An insn may return a null string template in a case where no assembler code is needed. */ if (*templ == 0) return; so I think the patch doesn't make it more costly, there is just one output_asm_insn extra call and the old one will return immediately. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-02-16 Jakub Jelinek * config/i386/i386.md (*movqi_internal): Remove static from buf variable. Use output_asm_insn (buf, operands); return ""; instead of return buf;. * config/i386/sse.md (_andnot3, *3, *andnot3, *andnottf3, *3, *tf3, 3): Likewise. --- gcc/config/i386/i386.md.jj 2019-02-12 21:48:53.183072497 +0100 +++ gcc/config/i386/i386.md 2019-02-15 23:25:36.198589133 +0100 @@ -2531,7 +2531,7 @@ (define_insn "*movqi_internal" "Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1]))" { - static char buf[128]; + char buf[128]; const char *ops; const char *suffix; @@ -2564,7 +2564,8 @@ (define_insn "*movqi_internal" suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b"; snprintf (buf, sizeof (buf), ops, suffix); - return buf; + output_asm_insn (buf, operands); + return ""; case TYPE_MSKLOG: if (operands[1] == const0_rtx) --- gcc/config/i386/sse.md.jj 2019-02-14 08:06:39.446519415 +0100 +++ gcc/config/i386/sse.md 2019-02-15 23:28:54.305366640 +0100 @@ -3198,7 +3198,7 @@ (define_insn "_andnot3" { - static char buf[128]; + char buf[128]; const char *ops; const char *suffix; @@ -3233,7 +3233,8 @@ (define_insn "_andnot3_andnot3_andnot3|%%0, %%1, %%2}", ops, suffix); - return buf; + output_asm_insn (buf, operands); + return ""; } [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -3314,7 +3316,7 @@ (define_insn "*3" "TARGET_SSE && && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { - static char buf[128]; + char buf[128]; const char *ops; const char *suffix; @@ -3349,7 +3351,8 @@ (define_insn "*3" } snprintf (buf, sizeof (buf), ops, suffix); - return buf; + output_asm_insn (buf, operands); + return ""; } [(set_attr "isa" "noavx,avx,avx512dq,avx512f") (set_attr "type" "sselog") @@ -3378,7 +3381,7 @@ (define_insn "*3" (match_operand:VF_512 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { - static char buf[128]; + char buf[128]; const char *ops; const char *suffix; @@ -3395,7 +3398,8 @@ (define_insn "*3" snprintf (buf, sizeof (buf), "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}", ops, suffix); - return buf; + output_asm_insn (buf, operands); + return ""; } [(set_attr "type" "sselog") (set_attr "prefix" "evex") @@ -3449,7 +3453,7 @@ (define_insn "*andnot3" (match_operand:MODEF 2 "register_operand" "x,x,v,v")))] "SSE_FLOAT_MODE_P (mode)" { - static char buf[128]; + char buf[128]; const char *ops; const char *suffix = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : ""; @@ -3485,7 +3489,8 @@ (define_insn "*andnot3" } snprintf (buf, sizeof (buf), ops, suffix); - return buf; + output_asm_insn (buf, operands); + return ""; } [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") @@ -3516,7 +3521,7 @@ (define_insn "*andnottf3" (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))] "TARGET_SSE" { - static char buf[128]; + char buf[128]; const char *ops; const char *tmp =
[PATCH] Improve mem = STRING_CST expansion (PR rtl-optimization/66152)
Hi! On the following testcase, we've regressed in bar since 8.x, in 8.x store merging came up with mem = 64-bit constant, but starting with the change to transform {0,1,2,3,4,5,6,7} char initializers into STRING_CSTs, we don't do that anymore. The mem = STRING_CST expansion can do that, but only if there are no embedded zeros. The following patch improves it even for embedded zeros, by using a new callback for the can_store_by_pieces/store_by_pieces calls which knows how to handle STRING_CST. We don't need strlen in that case, can use TREE_STRING_CST instead. Additionally, if the STRING_CST is slightly shorter than the destination region, it might generate better code by trying to store_by_pieces it all in one go (bytes from STRING_CST until the last one, followed by artificially added zeros) and only if that doesn't seem to be beneficial (e.g. very small STRING_CST followed by kilobytes of zeros) goes for the store_by_pieces of STRING_CST (rounded up to next STORE_MAX_PIECES) followed by a clear_storage. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-02-16 Jakub Jelinek PR rtl-optimization/66152 * builtins.h (c_readstr): Declare. * builtins.c (c_readstr): Remove forward declaration. Add null_terminated_p argument, if false, read all bytes from the string instead of stopping after '\0'. * expr.c (string_cst_read_str): New function. (store_expr): Use string_cst_read_str instead of builtin_strncpy_read_str. Try to store by pieces the whole exp_len first, and only if that fails, split it up into store by pieces followed by clear_storage. Formatting fix. * gcc.target/i386/pr66152.c: New test. --- gcc/builtins.h.jj 2019-02-14 08:06:37.878546571 +0100 +++ gcc/builtins.h 2019-02-15 11:33:50.208180171 +0100 @@ -103,6 +103,7 @@ struct c_strlen_data }; extern tree c_strlen (tree, int, c_strlen_data * = NULL, unsigned = 1); +extern rtx c_readstr (const char *, scalar_int_mode, bool = true); extern void expand_builtin_setjmp_setup (rtx, rtx); extern void expand_builtin_setjmp_receiver (rtx); extern void expand_builtin_update_setjmp_buf (rtx); --- gcc/builtins.c.jj 2019-02-11 20:58:48.509965578 +0100 +++ gcc/builtins.c 2019-02-15 11:37:00.046029652 +0100 @@ -95,7 +95,6 @@ builtin_info_type builtin_info[(int)END_ /* Non-zero if __builtin_constant_p should be folded right away. */ bool force_folding_builtin_constant_p; -static rtx c_readstr (const char *, scalar_int_mode); static int target_char_cast (tree, char *); static rtx get_memory_rtx (tree, tree); static int apply_args_size (void); @@ -802,10 +801,14 @@ c_strlen (tree src, int only_value, c_st } /* Return a constant integer corresponding to target reading - GET_MODE_BITSIZE (MODE) bits from string constant STR. */ - -static rtx -c_readstr (const char *str, scalar_int_mode mode) + GET_MODE_BITSIZE (MODE) bits from string constant STR. If + NULL_TERMINATED_P, reading stops after '\0' character, all further ones + are assumed to be zero, otherwise it reads as many characters + as needed. */ + +rtx +c_readstr (const char *str, scalar_int_mode mode, + bool null_terminated_p/*=true*/) { HOST_WIDE_INT ch; unsigned int i, j; @@ -830,7 +833,7 @@ c_readstr (const char *str, scalar_int_m j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1; j *= BITS_PER_UNIT; - if (ch) + if (ch || !null_terminated_p) ch = (unsigned char) str[i]; tmp[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT); } --- gcc/expr.c.jj 2019-02-08 20:00:40.309835608 +0100 +++ gcc/expr.c 2019-02-15 11:37:18.715719809 +0100 @@ -5453,6 +5453,30 @@ emit_storent_insn (rtx to, rtx from) return maybe_expand_insn (code, 2, ops); } +/* Helper function for store_expr storing of STRING_CST. */ + +static rtx +string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode) +{ + tree str = (tree) data; + + gcc_assert (offset >= 0); + if (offset >= TREE_STRING_LENGTH (str)) +return const0_rtx; + + if ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode) + > (unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (str)) +{ + char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode)); + size_t l = TREE_STRING_LENGTH (str) - offset; + memcpy (p, TREE_STRING_POINTER (str) + offset, l); + memset (p + l, '\0', GET_MODE_SIZE (mode) - l); + return c_readstr (p, mode, false); +} + + return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false); +} + /* Generate code for computing expression EXP, and storing the value into TARGET. @@ -5472,7 +5496,7 @@ emit_storent_insn (rtx to, rtx from) rtx store_expr (tree exp, rtx target, int call_param_p, - bool nontemporal, bool reverse) + bool nontemporal, bool reverse) { rtx temp; rtx alt_rtl = NULL_RTX; @@ -5606,36 +5630,32 @@
[PATCH] Teach evrp that main's argc argument is always non-negative for C family (PR tree-optimization/89350)
Hi! Both the C and C++ standard guarantee that the argc argument to main is non-negative, the following patch sets (or adjusts) the corresponding SSA_NAME_RANGE_INFO. While main is just one, with IPA VRP it can also propagate etc. I had to change one testcase because it started optimizing it better (the test has been folded away), so no sinking was done. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-02-16 Jakub Jelinek PR tree-optimization/89350 * gimple-ssa-evrp.c: Include tree-dfa.h and langhooks.h. (maybe_set_main_argc_range): New function. (execute_early_vrp): Call it. * gcc.dg/tree-ssa/vrp122.c: New test. * gcc.dg/tree-ssa/ssa-sink-3.c (main): Rename to ... (bar): ... this. --- gcc/gimple-ssa-evrp.c.jj2019-01-01 12:37:15.712998659 +0100 +++ gcc/gimple-ssa-evrp.c 2019-02-15 09:49:56.768534668 +0100 @@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. #include "tree-cfgcleanup.h" #include "vr-values.h" #include "gimple-ssa-evrp-analyze.h" +#include "tree-dfa.h" +#include "langhooks.h" class evrp_folder : public substitute_and_fold_engine { @@ -291,6 +293,39 @@ evrp_dom_walker::cleanup (void) evrp_folder.vr_values->cleanup_edges_and_switches (); } +/* argc in main in C/C++ is guaranteed to be non-negative. Adjust the + range info for it. */ + +static void +maybe_set_main_argc_range (void) +{ + if (!DECL_ARGUMENTS (current_function_decl) + || !(lang_GNU_C () || lang_GNU_CXX () || lang_GNU_OBJC ())) +return; + + tree argc = DECL_ARGUMENTS (current_function_decl); + if (TYPE_MAIN_VARIANT (TREE_TYPE (argc)) != integer_type_node) +return; + + argc = ssa_default_def (cfun, argc); + if (argc == NULL_TREE) +return; + + wide_int min, max; + value_range_kind kind = get_range_info (argc, , ); + if (kind == VR_VARYING) +{ + min = wi::zero (TYPE_PRECISION (integer_type_node)); + max = wi::to_wide (TYPE_MAX_VALUE (integer_type_node)); +} + else if (kind == VR_RANGE && wi::neg_p (min) && !wi::neg_p (max)) +min = wi::zero (TYPE_PRECISION (integer_type_node)); + else +return; + + set_range_info (argc, VR_RANGE, min, max); +} + /* Main entry point for the early vrp pass which is a simplified non-iterative version of vrp where basic blocks are visited in dominance order. Value ranges discovered in early vrp will also be used by ipa-vrp. */ @@ -307,6 +342,10 @@ execute_early_vrp () scev_initialize (); calculate_dominance_info (CDI_DOMINATORS); + /* argc in main in C/C++ is guaranteed to be non-negative. */ + if (MAIN_NAME_P (DECL_NAME (current_function_decl))) +maybe_set_main_argc_range (); + /* Walk stmts in dominance order and propagate VRP. */ evrp_dom_walker walker; walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun)); --- gcc/testsuite/gcc.dg/tree-ssa/vrp122.c.jj 2019-02-15 09:54:07.016357759 +0100 +++ gcc/testsuite/gcc.dg/tree-ssa/vrp122.c 2019-02-15 09:53:59.299486561 +0100 @@ -0,0 +1,14 @@ +/* PR tree-optimization/89350 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-not "link_error \\\(" "optimized" } } */ + +extern void link_error (void); + +int +main (int argc, const char *argv[]) +{ + if (argc < 0) +link_error (); + return 0; +} --- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c.jj 2015-05-29 15:03:44.947546711 +0200 +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c 2019-02-16 08:04:29.951126611 +0100 @@ -2,7 +2,7 @@ /* { dg-options "-O2 -fdump-tree-sink-stats" } */ extern void foo(int a); int -main (int argc) +bar (int argc) { int a; a = argc + 1; Jakub
Re: C++ PATCH for c++/89217 - ICE with list-initialization in range-based for loop
On 2/11/19 6:03 PM, Marek Polacek wrote: On Mon, Feb 11, 2019 at 01:43:36PM -0500, Jason Merrill wrote: On 2/7/19 6:02 PM, Marek Polacek wrote: Since r268321 we can call digest_init even in a template, when the compound literal isn't instantiation-dependent. Right. And since digest_init modifies the CONSTRUCTOR in place, that means the template trees are digested rather than the original parse trees that we try to use. If we're going to use digest_init, we should probably save another CONSTRUCTOR with the original trees. I tried unsharing the constructor and even its contents but only then did I realize that this cannot work. Why wouldn't going back to saving {*((struct S *) this)->r} work? It's not digest_init that adds the problematic INDIRECT_REF via convert_from_reference, it's instantiate_pending_templates -> tsubst_expr -> ... -> finish_non_static_data_member. So the problem isn't sharing the contents of the CONSTRUCTOR, but rather what finish_non_static_data_member does with the {.r=(struct R &) (struct R *) ((struct S *) this)->r} expression. The same problem would appear even before r268321 changes if we called tsubst_* twice on the CONSTRUCTOR above. Yes, it sounds like there's a bug in that path as well. Perhaps tsubst_copy_and_build/COMPONENT_REF should strip a REFERENCE_REF_P if t was already a reference. Do you still think digest_init and/or finish_compound_literal need tweaking? I imagine that saving post-digest trees might cause other problems, but perhaps not. Perhaps we ought to move away more generally from trying to save the original parse trees for non-dependent expressions and messing with NON_DEPENDENT_EXPR. Jason
Re: [PATCH] v2: Fix excess warnings from -Wtype-limits with location wrappers (PR c++/88680)
On 2/14/19 4:20 PM, David Malcolm wrote: On Thu, 2019-02-14 at 17:32 +0100, Jakub Jelinek wrote: On Thu, Feb 14, 2019 at 11:26:15AM -0500, David Malcolm wrote: There's an asymmetry in the warning; it's looking for a comparison of a LHS expression against an RHS constant 0, spelled as "0". If we fold_for_warn on the RHS, then that folding introduces a warning for expressions that aren't spelled as "0" but can be folded to 0, e.g., with: enum { FOO, BAR }; So, shouldn't it be made symmetric? Check if one argument is literal 0 before folding, and only if it is, fold_for_warn the other argument? Jakub The reference to symmetry in my earlier email was somewhat misleading, sorry. The test happens after a canonicalization of the ordering happens here, near the top of shorten_compare: /* If first arg is constant, swap the args (changing operation so value is preserved), for canonicalization. Don't do this if the second arg is 0. */ so this already gives us symmetry. Here's an updated version of the patch which add the fold_for_warn in a slightly later place, and adds a comment, and some more test cases. Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. OK for trunk? OK. Jason
Re: [C++ PATCH] preview: Fix braces around scalar initializer (C++/88572) Inbox x
On 2/14/19 7:09 PM, will wray wrote: Thanks Jason. Adding this 'else if' condition afterwards seems to work: else if (BRACE_ENCLOSED_INITIALIZER_P (CONSTRUCTOR_ELT (stripped_init,0)->value)) { if (complain & tf_error) error ("too many braces around scalar initializer for type %qT", type); init = error_mark_node; } I'll regtest that and run through the rest of the reshape logic again. I think the first_initializer_p check should be part of this condition rather than the C++98 condition. What do you think about the fact that this patch now rejects empty brace inits like int{{}} that was previously accepted? It's a breaking change for any code that was incorrectly doing that. The change makes sense to me; I would hope that such code is rare. Jason On Thu, Feb 14, 2019 at 6:02 PM Jason Merrill wrote: On 2/12/19 6:04 PM, will wray wrote: A proposed patch for Bug 88572 is attached to the bug report along with a short description and Change Log (a link there gives a pretty diff of the patch): https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88572#c15 I'd appreciate any review of this patch, as well as testing on more platforms. The patch with updated tests passes for me on x86_64. There's also test code in bug comment #1 that demonstrates SFINAE based on the nesting of braces. It could also be added to the testsuite - I'm not sure how to do that or if it is needed. + if (cxx_dialect < cxx11 || first_initializer_p) I would expect this to miss the error in struct A { int i; } a = {{{42}}}; I see that we end up complaining about this in convert_like_real because implicit_conversion catches the problem here, but I think we ought to catch it in reshape_init_r as well. So, also complain if the element of the CONSTRUCTOR is also BRACE_ENCLOSED_INITIALIZER_P. Jason
Re: [PATCH] document __builtin_is_constant_evaluated
On 2/13/19 4:33 PM, Martin Sebor wrote: Index: gcc/doc/extend.texi === --- gcc/doc/extend.texi (revision 268856) +++ gcc/doc/extend.texi (working copy) @@ -12890,6 +12890,22 @@ built-in in this case, because it has no opportuni optimization. @end deftypefn +@deftypefn {Built-in Function} bool __builtin_is_constant_evaluated () +The @code{__builtin_is_constant_evaluated} function is available only +in C++. Its main use case is to determine whether a @code{constexpr} +function is being called in a @code{constexpr} context. A call to +the function evaluates to a core constant expression with the value +@code{true} if and only if it occurs within the evaluation of an expression +or conversion that is manifestly constant-evaluated as defined in the C++ +standard. Manifestly constant-evaluated contexts include constant-expressions, +the conditions of @code{constexpr if} statements, constraint-expresions, and s/expresions/expressions/ +initializers of variables usable in constant expressions. The built-in is +intended to be used by implementations of the @code{std::is_constant_evaluated} +C++ function. Programs should make use of the latter function rather than +invoking the built-in directly. For more details refer to the latest revision +of the C++ standard. +@end deftypefn + @deftypefn {Built-in Function} long __builtin_expect (long @var{exp}, long @var{c}) @opindex fprofile-arcs You may use @code{__builtin_expect} to provide the compiler with I think this is generally reasonable (and I agree with the rationale for documenting this at all), but I'd like to see this rearranged and rephrased to put the most important point (it's an internal hook to implement std::is_constant_evaluated and shouldn't be called directly) before the technical details, with a paragraph break in between. -Sandra
Re: [PATCH] document __has_attribute and __has_include
On 2/13/19 2:46 PM, Martin Sebor wrote: The attached patch adds documentation for the __has_attribute (and __has_cpp_attribute) and __has_include operators added in r215752. Thanks! I was a little unsure where to add this, whether the preprocessor manual or the GCC manual, or both. It seems that it belongs in the preprocessor manual but since more users read the GCC manual, it's likely to be overlooked there. I think the preprocessor manual is the right place. A while back I brought up the idea of consolidating the preprocessor docs into the GCC manual but the consensus seemed to be for retaining a separate preprocessor manual. My comments on this patch are mostly trivial markup things. @@ -3422,6 +3425,99 @@ condition succeeds after the original @samp{#if} a @samp{#else} is allowed after any number of @samp{#elif} directives, but @samp{#elif} may not follow @samp{#else}. +@node __has_attribute +@subsection __has_attribute Please use @code markup in the @subsection. +@cindex @code{__has_attribute} + +The special operator @code{__has_attribute (operand)} may be used in @code{__has_attribute (@var{operand})} +@samp{#if} and @samp{#elif} expressions to test whether the attribute +referenced by its argument is recognized by GCC. Using the operator +in other contexts is not valid. In C code, @var{operand} must be +a valid identifier. In C++ code, @var{operand} may be optionally +introduced by the @code{attribute-scope::} prefix. I think "attribute-scope" is not a literal part of the prefix, so @code{@var{attribute-scope}::} +The @code{attribute-scope} prefix identifies the ``namespace'' within And @var markup here, too. +which the attribute is recognized. The scope of GCC attributes is +@samp{gnu} or @samp{__gnu__}. The operator by itself, without any The @code{__has_attribute} operator by itself +@var{operand} or parentheses, acts as a predefined macro so that support +for it can be tested in portable code. Thus, the recommended use of +the operator is as follows: + +@smallexample +#if defined __has_attribute +# if __has_attribute (nonnull) +#define ATTR_NONNULL __attribute__ ((nonnull)) +# endif +#endif +@end smallexample + +The first @samp{#if} test succeeds only when the operator is supported +by the version of GCC (or another compiler) being used. Only when that +test succeeds is it valid to use @code{__has_attribute} as a preprocessor +operator. As a result, combining the two tests into a single expression as +shown below would only be valid with a compiler that supports the operator +but not with others that don't. + +@smallexample +#if defined __has_attribute && __has_attribute (nonnull) /* not portable */ +@dots{} +#endif +@end smallexample + +@node __has_cpp_attribute +@subsection __has_cpp_attribute @code markup in the @subsection title, again. +@cindex @code{__has_cpp_attribute} + +The special operator @code{__has_cpp_attribute (operand)} may be used @var{operand} markup again. +in @samp{#if} and @samp{#elif} expressions in C++ code to test whether +the attribute referenced by its argument is recognized by GCC. +@code{__has_cpp_attribute (operand)} is equivalent to +@code{__has_attribute (operand)} except that when @code{operand} The 3 instances above too. +designates a supported standard attribute it evaluates to an integer +constant of the form @code{MM} indicating the year and month when +the attribute was first introduced into the C++ standard. For additional +information including the dates of the introduction of current standard +attributes, see @w{@uref{https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations/, +SD-6: SG10 Feature Test Recommendations}}. + +@node __has_include +@subsection __has_include @code markup in title again +@cindex @code{__has_include} + > +The special operator @code{__has_include (operand)} may be used in @samp{#if} @var{operand} +and @samp{#elif} expressions to test whether the header referenced by its +@var{operand} can be included using the @samp{#include} directive. Using +the operator in other contexts is not valid. The @var{operand} takes +the same form as the file in the @samp{#include} directive (@xref{Include +Syntax}) and evaluates to a nonzero value if the header can be included and +to zero otherwise. Note that that the ability to include a header doesn't +imply that the header doesn't contain invalid constructs or @samp{#error} +directives that would cause the preprocessor to fail. + +The @code{__has_include} operator by itself, without any @var{operand} or +parentheses, acts as a predefined macro so that support for it can be tested +in portable code. Thus, the recommended use of the operator is as follows: + +@smallexample +#if defined __has_include +# if __has_include () +#include +# endif +#endif +@end smallexample + +The first @samp{#if} test succeeds only when the operator is supported +by the version of GCC (or another
Re: [PATCH doc] correct/expand -Wreturn-type
On 2/6/19 11:15 AM, Martin Sebor wrote: [snip] But whatever. Attached is a change with the subsentences reversed. This version of the patch is OK. -Sandra
Re: [PATCH doc] correct/improve -Wmissing-attributes and -Wattribute-alias
On 2/6/19 9:16 AM, Martin Sebor wrote: The manual documents the -Wno-missing-attributes form of the option as if it was enabled by default, even though it's enabled by -Wall (I can't get this -Wno- convention straight in my head). I also got private comments on the documentation of the option suggesting to add cross-references, and to list the attributes -Wattribute-alias considers (the same ones as -Wmissing-attributes). The attached patch makes these changes. I found the discussion of both options incomprehensible even with this patch. :-( The defaults are incorrect, there are typos, awkward wording and confusing paragraph organization, etc. So I consulted the sources and came up with the attached alternative patch. Can you review this for correctness and generally making sense? -Sandra 2019-02-15 Sandra Loosemore Martin Sebor gcc/ * c-family/c.opt (Wmissing-attributes): Clean up doc string. * common.opt (Wattribute-alias): Likewise. * doc/invoke.texi (Option Summary): List general form of -Wattribute-alias=. List positive form of -Wmissing-attributes. (-Wmissing-attributes): Invert entry, rewrite and correct default. Add cross-references. (-Wattribute-alias): Rewrite and correct default. Mention considered attributes (same as for -Wmissing-attributes). Index: gcc/c-family/c.opt === --- gcc/c-family/c.opt (revision 268948) +++ gcc/c-family/c.opt (working copy) @@ -818,7 +818,7 @@ Warn on primary template declaration. Wmissing-attributes C ObjC C++ ObjC++ Var(warn_missing_attributes) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) Warn about declarations of entities that may be missing attributes -that related entities have been declared with it. +that related entities have been declared with. Wmissing-format-attribute C ObjC C++ ObjC++ Warning Alias(Wsuggest-attribute=format) Index: gcc/common.opt === --- gcc/common.opt (revision 268948) +++ gcc/common.opt (working copy) @@ -552,11 +552,11 @@ Warn about inappropriate attribute usage Wattribute-alias Common Alias(Wattribute_alias=, 1, 0) Warning -Warn about type safety and similar errors and mismatches in attribute alias and related. +Warn about type safety and similar errors and mismatches in declarations with alias attributes. Wattribute-alias= Common Joined RejectNegative UInteger Var(warn_attribute_alias) Init(1) Warning IntegerRange(0, 2) -Warn about type safety and similar errors and mismatches in attribute alias and related. +Warn about type safety and similar errors and mismatches in declarations with alias attributes. Wcannot-profile Common Var(warn_cannot_profile) Init(1) Warning Index: gcc/doc/invoke.texi === --- gcc/doc/invoke.texi (revision 268948) +++ gcc/doc/invoke.texi (working copy) @@ -288,7 +288,7 @@ Objective-C and Objective-C++ Dialects}. -Walloc-zero -Walloc-size-larger-than=@var{byte-size} @gol -Walloca -Walloca-larger-than=@var{byte-size} @gol -Wno-aggressive-loop-optimizations -Warray-bounds -Warray-bounds=@var{n} @gol --Wno-attributes -Wno-attribute-alias @gol +-Wno-attributes -Wattribute-alias=@var{n} @gol -Wbool-compare -Wbool-operation @gol -Wno-builtin-declaration-mismatch @gol -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol @@ -322,7 +322,7 @@ Objective-C and Objective-C++ Dialects}. -Winvalid-pch -Wlarger-than=@var{byte-size} @gol -Wlogical-op -Wlogical-not-parentheses -Wlong-long @gol -Wmain -Wmaybe-uninitialized -Wmemset-elt-size -Wmemset-transposed-args @gol --Wmisleading-indentation -Wno-missing-attributes -Wmissing-braces @gol +-Wmisleading-indentation -Wmissing-attributes -Wmissing-braces @gol -Wmissing-field-initializers -Wmissing-format-attribute @gol -Wmissing-include-dirs -Wmissing-noreturn -Wmissing-profile @gol -Wno-multichar -Wmultistatement-macros -Wnonnull -Wnonnull-compare @gol @@ -5056,7 +5056,7 @@ about the layout of the file that the di This warning is enabled by @option{-Wall} in C and C++. -@item -Wno-missing-attributes +@item -Wmissing-attributes @opindex Wmissing-attributes @opindex Wno-missing-attributes Warn when a declaration of a function is missing one or more attributes @@ -5064,10 +5064,10 @@ that a related function is declared with affect the correctness or efficiency of generated code. For example, the warning is issued for declarations of aliases that use attributes to specify less restrictive requirements than those of their targets. -This typically represents a potential optimization oportunity rather -than a hidden bug. The @option{-Wattribute-alias} option controls warnings -issued for mismatches between declarations of aliases and their targets -that might be indicative of code generation bugs. +This typically represents a potential optimization opportunity. +By contrast,
[PATCH 32/42] i386: Emulate MMX pshufb with SSE version
Emulate MMX version of pshufb with SSE version by masking out the bit 3 of the shuffle control byte. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_pshufbv8qi3): Changed to define_insn_and_split. Also allow TARGET_MMX_WITH_SSE. Add SSE emulation. --- gcc/config/i386/sse.md | 46 +- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 80b1a46f507..704e211c0b8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15697,17 +15697,45 @@ (set_attr "btver2_decode" "vector") (set_attr "mode" "")]) -(define_insn "ssse3_pshufbv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] -UNSPEC_PSHUFB))] - "TARGET_SSSE3" - "pshufb\t{%2, %0|%0, %2}"; - [(set_attr "type" "sselog1") +(define_insn_and_split "ssse3_pshufbv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")] +UNSPEC_PSHUFB)) + (clobber (match_scratch:V4SI 3 "=X,x,Yv"))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + pshufb\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 3) + (and:V4SI (match_dup 3) (match_dup 2))) + (set (match_dup 0) + (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))] +{ + /* Emulate MMX version of pshufb with SSE version by masking out the + bit 3 of the shuffle control byte. */ + operands[0] = lowpart_subreg (V16QImode, operands[0], + GET_MODE (operands[0])); + operands[1] = lowpart_subreg (V16QImode, operands[1], + GET_MODE (operands[1])); + operands[2] = lowpart_subreg (V4SImode, operands[2], + GET_MODE (operands[2])); + operands[4] = lowpart_subreg (V16QImode, operands[3], + GET_MODE (operands[3])); + rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7), +GEN_INT (0xf7f7f7f7), +GEN_INT (0xf7f7f7f7), +GEN_INT (0xf7f7f7f7)); + rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par); + operands[5] = force_const_mem (V4SImode, vec_const); +} + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "_psign3" [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x") -- 2.20.1
Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE
On Fri, Feb 15, 2019 at 9:50 AM Uros Bizjak wrote: > > On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu wrote: > > > > On x86-64, since __m64 is returned and passed in XMM registers, we can > > emulate MMX intrinsics with SSE instructions. To support it, we added > > > > #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2) > > > > ;; Define instruction set of MMX instructions > > (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" > > (const_string "base")) > > > > (eq_attr "mmx_isa" "native") > >(symbol_ref "!TARGET_MMX_WITH_SSE") > > (eq_attr "mmx_isa" "x64") > >(symbol_ref "TARGET_MMX_WITH_SSE") > > (eq_attr "mmx_isa" "x64_avx") > >(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") > > (eq_attr "mmx_isa" "x64_noavx") > >(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX") > > > > We added SSE emulation to MMX patterns and disabled MMX alternatives with > > TARGET_MMX_WITH_SSE. > > > > Most of MMX instructions have equivalent SSE versions and results of some > > SSE versions need to be reshuffled to the right order for MMX. Thee are > > couple tricky cases: > > > > 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent. We emulate MMX > > maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the > > mask operand and handle unmapped bits 64:127 at memory address by > > adjusting source and mask operands together with memory address. > > > > 2. MMX movntq is emulated with SSE2 DImode movnti, which is available > > in 64-bit mode. > > > > 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index. > > SSE emulation must clear the bit 4 in the shuffle control mask. > > > > 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve > > the upper 64 bits of destination XMM register. > > > > Tests are also added to check each SSE emulation of MMX intrinsics. > > > > There are no regressions on i686 and x86-64. For x86-64, GCC is also > > tested with > > > > --with-arch=native --with-cpu=native > > > > on AVX2 and AVX512F machines. > > I went through the code again, and looks OK in general, modulo > mmx_nonimmediate_operand issue and a couple of minor issues. > > Please substitute nonimmediate_operand predicate with > mmx_nonimmediate_operand in expanders and insn patterns. Please note > that the proposed convention is to name the operand > register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest > we name the predicate in this way. > > There is an issue with a change to emms pattern. > > And let's remove _mm_empty () calls from testcases; they complicate > things too much for no apparent benefit. > > With those issues fixed, the patchset is OK for gcc-10 when it opens. The new patch set starts at https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01275.html including https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01271.html for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89372 -- H.J.
[PATCH 40/42] i386: Allow MMX intrinsic emulation with SSE
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA by default with TARGET_MMX_WITH_SSE. For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit mode since MMX intrinsics can be emulated wit SSE. gcc/ PR target/89021 * config/i386/i386-builtin.def: Enable MMX intrinsics with SSE/SSE2/SSSE3. * config/i386/i386.c (ix86_init_mmx_sse_builtins): Likewise. (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX intrinsics with TARGET_MMX_WITH_SSE. * config/i386/mmintrin.h: Only require SSE2 if __MMX_WITH_SSE__ is defined. gcc/testsuite/ PR target/89021 * gcc.target/i386/pr82483-1.c: Error only on ia32. * gcc.target/i386/pr82483-2.c: Likewise. --- gcc/config/i386/i386-builtin.def | 126 +++--- gcc/config/i386/i386.c| 29 - gcc/config/i386/mmintrin.h| 12 ++- gcc/testsuite/gcc.target/i386/pr82483-1.c | 2 +- gcc/testsuite/gcc.target/i386/pr82483-2.c | 2 +- 5 files changed, 101 insertions(+), 70 deletions(-) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 88005f4687f..10a9d631f29 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKN BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID) /* MMX */ -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID) +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID) /* 3DNow! */ BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID) @@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNO BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT) /* MMX */ -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) - -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) - -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) - -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int)
[PATCH 26/42] i386: Emulate MMX umulv1siv1di3 with SSE2
Emulate MMX umulv1siv1di3 with SSE2. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (sse2_umulv1siv1di3): Add SSE emulation support. (*sse2_umulv1siv1di3): Add SSE2 emulation. --- gcc/config/i386/mmx.md | 26 -- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 993ad99a36e..9cf0251293a 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -905,30 +905,36 @@ (mult:V1DI (zero_extend:V1DI (vec_select:V1SI - (match_operand:V2SI 1 "nonimmediate_operand") + (match_operand:V2SI 1 "register_mmxmem_operand") (parallel [(const_int 0)]))) (zero_extend:V1DI (vec_select:V1SI - (match_operand:V2SI 2 "nonimmediate_operand") + (match_operand:V2SI 2 "register_mmxmem_operand") (parallel [(const_int 0)])] - "TARGET_SSE2" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);") (define_insn "*sse2_umulv1siv1di3" - [(set (match_operand:V1DI 0 "register_operand" "=y") + [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv") (mult:V1DI (zero_extend:V1DI (vec_select:V1SI - (match_operand:V2SI 1 "nonimmediate_operand" "%0") + (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv") (parallel [(const_int 0)]))) (zero_extend:V1DI (vec_select:V1SI - (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv") (parallel [(const_int 0)])] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)" - "pmuludq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && TARGET_SSE2 + && ix86_binary_operator_ok (MULT, V2SImode, operands)" + "@ + pmuludq\t{%2, %0|%0, %2} + pmuludq\t{%2, %0|%0, %2} + vpmuludq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxmul,ssemul,ssemul") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_v4hi3" [(set (match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 22/42] i386: Emulate MMX mmx_uavgv8qi3 with SSE
Emulate MMX mmx_uavgv8qi3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_uavgv8qi3): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (*mmx_uavgv8qi3): Add SSE emulation. --- gcc/config/i386/mmx.md | 25 +++-- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index d78c6a31962..570153521a1 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1678,50 +1678,55 @@ (plus:V8HI (plus:V8HI (zero_extend:V8HI - (match_operand:V8QI 1 "nonimmediate_operand")) + (match_operand:V8QI 1 "register_mmxmem_operand")) (zero_extend:V8HI - (match_operand:V8QI 2 "nonimmediate_operand"))) + (match_operand:V8QI 2 "register_mmxmem_operand"))) (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1] - "TARGET_SSE || TARGET_3DNOW" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);") (define_insn "*mmx_uavgv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") (truncate:V8QI (lshiftrt:V8HI (plus:V8HI (plus:V8HI (zero_extend:V8HI - (match_operand:V8QI 1 "nonimmediate_operand" "%0")) + (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")) (zero_extend:V8HI - (match_operand:V8QI 2 "nonimmediate_operand" "ym"))) + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))) (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1] - "(TARGET_SSE || TARGET_3DNOW) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (PLUS, V8QImode, operands)" { /* These two instructions have the same operation, but their encoding is different. Prefer the one that is de facto standard. */ - if (TARGET_SSE || TARGET_3DNOW_A) + if (TARGET_MMX_WITH_SSE && TARGET_AVX) +return "vpavgb\t{%2, %1, %0|%0, %1, %2}"; + else if (TARGET_SSE || TARGET_3DNOW_A) return "pavgb\t{%2, %0|%0, %2}"; else return "pavgusb\t{%2, %0|%0, %2}"; } - [(set_attr "type" "mmxshft") + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sseiadd,sseiadd") (set (attr "prefix_extra") (if_then_else (not (ior (match_test "TARGET_SSE") (match_test "TARGET_3DNOW_A"))) (const_string "1") (const_string "*"))) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_uavgv4hi3" [(set (match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 28/42] i386: Emulate MMX ssse3_phwv4hi3 with SSE
Emulate MMX ssse3_phwv4hi3 with SSE by moving bits 64:95 to bits 32:63 in SSE register. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_phwv4hi3): Changed to define_insn_and_split to support SSE emulation. --- gcc/config/i386/sse.md | 34 ++ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 06c9b5b58f1..38b83c57ffc 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15232,13 +15232,13 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "ssse3_phwv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") +(define_insn_and_split "ssse3_phwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (vec_concat:V4HI (vec_concat:V2HI (ssse3_plusminus:HI (vec_select:HI - (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 1 "register_operand" "0,0,Yv") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) (ssse3_plusminus:HI @@ -15247,19 +15247,37 @@ (vec_concat:V2HI (ssse3_plusminus:HI (vec_select:HI - (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))] - "TARGET_SSSE3" - "phw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + phw\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(const_int 0)] +{ + /* Generate SSE version of the operation. */ + rtx op0 = lowpart_subreg (V8HImode, operands[0], + GET_MODE (operands[0])); + rtx op1 = lowpart_subreg (V8HImode, operands[1], + GET_MODE (operands[1])); + rtx op2 = lowpart_subreg (V8HImode, operands[2], + GET_MODE (operands[2])); + emit_insn (gen_ssse3_phwv8hi3 (op0, op1, op2)); + ix86_move_vector_high_sse_to_mmx (op0); + DONE; +} + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "avx2_phdv8si3" [(set (match_operand:V8SI 0 "register_operand" "=x") -- 2.20.1
[PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled
With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP when MMX is disabled. PR target/89021 * config/i386/mmx.md (mmx_): Renamed to ... (mmx__1): This. (mmx_): New expander. --- gcc/config/i386/mmx.md | 29 - 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 9cf0251293a..0f925c0b1ea 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1848,7 +1848,34 @@ [(UNSPECV_EMMS "emms") (UNSPECV_FEMMS "femms")]) -(define_insn "mmx_" +(define_expand "mmx_" + [(unspec_volatile [(const_int 0)] EMMS) + (clobber (reg:XF ST0_REG)) + (clobber (reg:XF ST1_REG)) + (clobber (reg:XF ST2_REG)) + (clobber (reg:XF ST3_REG)) + (clobber (reg:XF ST4_REG)) + (clobber (reg:XF ST5_REG)) + (clobber (reg:XF ST6_REG)) + (clobber (reg:XF ST7_REG)) + (clobber (reg:DI MM0_REG)) + (clobber (reg:DI MM1_REG)) + (clobber (reg:DI MM2_REG)) + (clobber (reg:DI MM3_REG)) + (clobber (reg:DI MM4_REG)) + (clobber (reg:DI MM5_REG)) + (clobber (reg:DI MM6_REG)) + (clobber (reg:DI MM7_REG))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" +{ + if (TARGET_MMX) + emit_insn (gen_mmx__1 ()); + else + emit_insn (gen_nop ()); + DONE; +}) + +(define_insn "mmx__1" [(unspec_volatile [(const_int 0)] EMMS) (clobber (reg:XF ST0_REG)) (clobber (reg:XF ST1_REG)) -- 2.20.1
[PATCH 12/42] i386: Emulate MMX vec_dupv2si with SSE
Emulate MMX vec_dupv2si with SSE. Add the "Yw" constraint to allow broadcast from integer register for AVX512BW with TARGET_AVX512VL. Only SSE register source operand is allowed. PR target/89021 * config/i386/constraints.md (Yw): New constraint. * config/i386/mmx.md (*vec_dupv2si): Changed to define_insn_and_split and also allow TARGET_MMX_WITH_SSE to support SSE emulation. --- gcc/config/i386/constraints.md | 6 ++ gcc/config/i386/mmx.md | 24 +--- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 16075b4acf3..c546b20d9dc 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -110,6 +110,8 @@ ;; v any EVEX encodable SSE register for AVX512VL target, ;; otherwise any SSE register ;; h EVEX encodable SSE register with number factor of four +;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL +;; target. (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" "First SSE register (@code{%xmm0}).") @@ -146,6 +148,10 @@ "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS" "@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.") +(define_register_constraint "Yw" + "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS" + "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.") + ;; We use the B prefix to denote any number of internal operands: ;; f FLAGS_REG ;; g GOT memory operand. diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index b0c6a8c8077..d568a534956 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1381,14 +1381,24 @@ (set_attr "length_immediate" "1") (set_attr "mode" "DI")]) -(define_insn "*vec_dupv2si" - [(set (match_operand:V2SI 0 "register_operand" "=y") +(define_insn_and_split "*vec_dupv2si" + [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw") (vec_duplicate:V2SI - (match_operand:SI 1 "register_operand" "0")))] - "TARGET_MMX" - "punpckldq\t%0, %0" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) + (match_operand:SI 1 "register_operand" "0,0,Yv,r")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + punpckldq\t%0, %0 + # + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 0) + (vec_duplicate:V4SI (match_dup 1)))] + "operands[0] = lowpart_subreg (V4SImode, operands[0], +GET_MODE (operands[0]));" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx,x64_avx") + (set_attr "type" "mmxcvt,ssemov,ssemov,ssemov") + (set_attr "mode" "DI,TI,TI,TI")]) (define_insn "*mmx_concatv2si" [(set (match_operand:V2SI 0 "register_operand" "=y,y") -- 2.20.1
[PATCH 37/42] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE
From: Uros Bizjak 2019-02-14 Uroš Bizjak PR target/89021 * config/i386/i386.md (*zero_extendsidi2): Add mmx_isa attribute. * config/i386/sse.md (*vec_concatv2sf_sse4_1): Ditto. (*vec_concatv2sf_sse): Ditto. (*vec_concatv2si_sse4_1): Ditto. (*vec_concatv2si): Ditto. (*vec_concatv4si_0): Ditto. (*vec_concatv2di_0): Ditto. --- gcc/config/i386/i386.md | 4 gcc/config/i386/sse.md | 16 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e1727676deb..22172fd77a8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3682,6 +3682,10 @@ (const_string "avx512bw") ] (const_string "*"))) + (set (attr "mmx_isa") + (if_then_else (eq_attr "alternative" "5,6") + (const_string "native") + (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "0,1,2,4") (const_string "multi") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 379da16615d..b6196b088fd 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -7201,6 +7201,10 @@ (const_string "mmxmov") ] (const_string "sselog"))) + (set (attr "mmx_isa") + (if_then_else (eq_attr "alternative" "7,8") + (const_string "native") + (const_string "*"))) (set (attr "prefix_data16") (if_then_else (eq_attr "alternative" "3,4") (const_string "1") @@ -7236,7 +7240,8 @@ movss\t{%1, %0|%0, %1} punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" - [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") + [(set_attr "mmx_isa" "*,*,native,native") + (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") (set_attr "mode" "V4SF,SF,DI,DI")]) (define_insn "*vec_concatv4sf" @@ -14509,6 +14514,10 @@ punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*") + (set (attr "mmx_isa") + (if_then_else (eq_attr "alternative" "8,9") + (const_string "native") + (const_string "*"))) (set (attr "type") (cond [(eq_attr "alternative" "7") (const_string "ssemov") @@ -14546,6 +14555,7 @@ punpckldq\t{%2, %0|%0, %2} movd\t{%1, %0|%0, %1}" [(set_attr "isa" "sse2,sse2,*,*,*,*") + (set_attr "mmx_isa" "*,*,*,*,native,native") (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov") (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")]) @@ -14575,7 +14585,8 @@ "@ %vmovq\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") + [(set_attr "mmx_isa" "*,native") + (set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex,orig") (set_attr "mode" "TI")]) @@ -14650,6 +14661,7 @@ %vmovq\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1}" [(set_attr "isa" "x64,*,*") + (set_attr "mmx_isa" "*,*,native") (set_attr "type" "ssemov") (set_attr "prefix_rex" "1,*,*") (set_attr "prefix" "maybe_vex,maybe_vex,orig") -- 2.20.1
[PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
PR target/89021 * config/i386/i386.c (ix86_expand_vector_init_duplicate): Set mmx_ok to true if TARGET_MMX_WITH_SSE is true. (ix86_expand_vector_init_one_nonzero): Likewise. (ix86_expand_vector_init_one_var): Likewise. (ix86_expand_vector_init_general): Likewise. (ix86_expand_vector_init): Likewise. (ix86_expand_vector_set): Likewise. (ix86_expand_vector_extract): Likewise. * config/i386/mmx.md (*vec_dupv2sf): Changed to define_insn_and_split to support SSE emulation. (*vec_extractv2sf_0): Likewise. (*vec_extractv2sf_1): Likewise. (*vec_extractv2si_0): Likewise. (*vec_extractv2si_1): Likewise. (*vec_extractv2si_zext_mem): Likewise. (vec_setv2sf): Also allow TARGET_MMX_WITH_SSE. (vec_extractv2sf_1 splitter): Likewise. (vec_extractv2sfsf): Likewise. (vec_setv2si): Likewise. (vec_extractv2si_1 splitter): Likewise. (vec_extractv2sisi): Likewise. (vec_setv4hi): Likewise. (vec_extractv4hihi): Likewise. (vec_setv8qi): Likewise. (vec_extractv8qiqi): Likewise. --- gcc/config/i386/i386.c | 8 + gcc/config/i386/mmx.md | 69 +++--- 2 files changed, 52 insertions(+), 25 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a76c17beece..25e0dc43a9e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, { bool ok; + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2SImode: @@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, bool use_vector_set = false; rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2DImode: @@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2DFmode: @@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, machine_mode quarter_mode = VOIDmode; int n, i; + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2SFmode: @@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) int i; rtx x; + mmx_ok |= TARGET_MMX_WITH_SSE; + /* Handle first initialization from vector elts. */ if (n_elts != XVECLEN (vals, 0)) { @@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) machine_mode mmode = VOIDmode; rtx (*gen_blendm) (rtx, rtx, rtx, rtx); + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2SFmode: @@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) bool use_vec_extr = false; rtx tmp; + mmx_ok |= TARGET_MMX_WITH_SSE; switch (mode) { case E_V2SImode: diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index a21e11c8dfb..fa0b0126e91 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -555,14 +555,23 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "V2SF")]) -(define_insn "*vec_dupv2sf" - [(set (match_operand:V2SF 0 "register_operand" "=y") +(define_insn_and_split "*vec_dupv2sf" + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") (vec_duplicate:V2SF - (match_operand:SF 1 "register_operand" "0")))] - "TARGET_MMX" - "punpckldq\t%0, %0" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) + (match_operand:SF 1 "register_operand" "0,0,Yv")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + punpckldq\t%0, %0 + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 0) + (vec_duplicate:V4SF (match_dup 1)))] + "operands[0] = lowpart_subreg (V4SFmode, operands[0], +GET_MODE (operands[0]));" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxcvt,ssemov,ssemov") + (set_attr "mode" "DI,TI,TI")]) (define_insn "*mmx_concatv2sf" [(set (match_operand:V2SF 0 "register_operand" "=y,y") @@ -580,7 +589,7 @@ [(match_operand:V2SF 0 "register_operand") (match_operand:SF 1 "register_operand") (match_operand 2 "const_int_operand")] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_set (false, operands[0], operands[1], INTVAL (operands[2])); @@ -594,11 +603,13 @@ (vec_select:SF (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m") (parallel [(const_int 0)])))] - "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + &&
[PATCH 41/42] i386: Enable TM MMX intrinsics with SSE2
This pach enables TM MMX intrinsics with SSE2 when MMX is disabled. PR target/89021 * config/i386/i386.c (bdesc_tm): Enable MMX intrinsics with SSE2. --- gcc/config/i386/i386.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 073a2534d1f..319a98f824a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -31065,13 +31065,13 @@ static const struct builtin_description bdesc_##kind[] = \ we're lazy. Add casts to make them fit. */ static const struct builtin_description bdesc_tm[] = { - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, @@ -31089,7 +31089,7 @@ static const struct builtin_description bdesc_tm[] = { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID }, { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, }; -- 2.20.1
[PATCH 21/42] i386: Emulate MMX maskmovq with SSE2 maskmovdqu
Emulate MMX maskmovq with SSE2 maskmovdqu for TARGET_MMX_WITH_SSE by zero-extending source and mask operands to 128 bits. Handle unmapped bits 64:127 at memory address by adjusting source and mask operands together with memory address. PR target/89021 * config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2 maskmovdqu for __MMX_WITH_SSE__. --- gcc/config/i386/xmmintrin.h | 61 + 1 file changed, 61 insertions(+) diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 58284378514..a915f6c87d7 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) { +#ifdef __MMX_WITH_SSE__ + /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits + 64:127 at address __P. */ + typedef long long __v2di __attribute__ ((__vector_size__ (16))); + typedef char __v16qi __attribute__ ((__vector_size__ (16))); + /* Zero-extend __A and __N to 128 bits. */ + __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 }; + __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 }; + + /* Check the alignment of __P. */ + __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf; + if (offset) +{ + /* If the misalignment of __P > 8, subtract __P by 8 bytes. +Otherwise, subtract __P by the misalignment. */ + if (offset > 8) + offset = 8; + __P = (char *) (((__SIZE_TYPE__) __P) - offset); + + /* Shift __A128 and __N128 to the left by the adjustment. */ + switch (offset) + { + case 1: + __A128 = __builtin_ia32_pslldqi128 (__A128, 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 8); + break; + case 2: + __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8); + break; + case 3: + __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8); + break; + case 4: + __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8); + break; + case 5: + __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8); + break; + case 6: + __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8); + break; + case 7: + __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8); + break; + case 8: + __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8); + __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8); + break; + default: + break; + } +} + __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P); +#else __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P); +#endif } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -- 2.20.1
[PATCH 25/42] i386: Emulate MMX movntq with SSE2 movntidi
Emulate MMX movntq with SSE2 movntidi. Only register source operand is allowed. PR target/89021 * config/i386/mmx.md (sse_movntq): Add SSE2 emulation. --- gcc/config/i386/mmx.md | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index bcce7c06c4f..993ad99a36e 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -214,12 +214,16 @@ }) (define_insn "sse_movntq" - [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "register_operand" "y")] + [(set (match_operand:DI 0 "memory_operand" "=m,m") + (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")] UNSPEC_MOVNTQ))] - "TARGET_SSE || TARGET_3DNOW_A" - "movntq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "@ + movntq\t{%1, %0|%0, %1} + movnti\t{%1, %0|%0, %1}" + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "mmxmov,ssemov") (set_attr "mode" "DI")]) ; -- 2.20.1
[PATCH 35/42] i386: Emulate MMX abs2 with SSE
Emulate MMX abs2 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (abs2): Add SSE emulation. --- gcc/config/i386/sse.md | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ec68b5dc2ce..92f5ad17156 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15942,16 +15942,19 @@ }) (define_insn "abs2" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv") (abs:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] - "TARGET_SSSE3" - "pabs\t{%1, %0|%0, %1}"; - [(set_attr "type" "sselog1") + (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + pabs\t{%1, %0|%0, %1} + %vpabs\t{%1, %0|%0, %1}" + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "sselog1") (set_attr "prefix_rep" "0") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI")]) ; ;; -- 2.20.1
[PATCH 24/42] i386: Emulate MMX mmx_psadbw with SSE
Emulate MMX mmx_psadbw with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_psadbw): Add SSE emulation. --- gcc/config/i386/mmx.md | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index b8983e1755a..bcce7c06c4f 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1770,14 +1770,19 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_psadbw" - [(set (match_operand:V1DI 0 "register_operand" "=y") -(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv") +(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")] UNSPEC_PSADBW))] - "TARGET_SSE || TARGET_3DNOW_A" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "@ + psadbw\t{%2, %0|%0, %2} + psadbw\t{%2, %0|%0, %2} + vpsadbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sseiadd,sseiadd") + (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_pmovmskb" [(set (match_operand:SI 0 "register_operand" "=r,r") -- 2.20.1
[PATCH 31/42] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
Emulate MMX ssse3_pmulhrswv4hi3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation. --- gcc/config/i386/sse.md | 20 +--- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index dc07173cb1c..80b1a46f507 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15652,25 +15652,31 @@ (set_attr "mode" "")]) (define_insn "*ssse3_pmulhrswv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (truncate:V4HI (lshiftrt:V4SI (plus:V4SI (lshiftrt:V4SI (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) (const_int 14)) (match_operand:V4HI 3 "const1_operand")) (const_int 1] - "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "pmulhrsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && TARGET_SSSE3 + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + pmulhrsw\t{%2, %0|%0, %2} + pmulhrsw\t{%2, %0|%0, %2} + vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sseimul") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "_pshufb3" [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") -- 2.20.1
[PATCH 38/42] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
PR target/89021 * config/i386/mmx.md (MMXMODE:mov): Also allow TARGET_MMX_WITH_SSE. (MMXMODE:*mov_internal): Likewise. (MMXMODE:movmisalign): Likewise. --- gcc/config/i386/mmx.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 0f925c0b1ea..a21e11c8dfb 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -70,7 +70,7 @@ (define_expand "mov" [(set (match_operand:MMXMODE 0 "nonimmediate_operand") (match_operand:MMXMODE 1 "nonimmediate_operand"))] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_move (mode, operands); DONE; @@ -81,7 +81,7 @@ "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x") (match_operand:MMXMODE 1 "nonimm_or_0_operand" "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))] - "TARGET_MMX + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -207,7 +207,7 @@ (define_expand "movmisalign" [(set (match_operand:MMXMODE 0 "nonimmediate_operand") (match_operand:MMXMODE 1 "nonimmediate_operand"))] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_move (mode, operands); DONE; -- 2.20.1
[PATCH 36/42] i386: Correct _pmulhrsw3[_mask]
There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require AVX2. To support TARGET_MMX_WITH_SSE, replace nonimmediate_operand with register_pmulhrswmem_operand in _pmulhrsw3. PR target/89372 * config/i386/predicates.md (register_pmulhrswmem_operand): New. * config/i386/sse.md (PMULHRSW): Remove V4HI. (PMULHRSW_MMX): New. (_pmulhrsw3): Replace PMULHRSW with PMULHRSW_MMX. Require TARGET_SSSE3, not TARGET_AVX2. Replace nonimmediate_operand with register_pmulhrswmem_operand. --- gcc/config/i386/predicates.md | 7 +++ gcc/config/i386/sse.md| 15 +-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index f3c2f72de54..b7cb26a81fe 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -56,6 +56,13 @@ (and (not (match_test "TARGET_MMX_WITH_SSE")) (match_operand 0 "memory_operand" +;; Match register operands, but include memory operands for +;; !(TARGET_MMX_WITH_SSE && mode == V4HImode). +(define_predicate "register_pmulhrswmem_operand" + (ior (match_operand 0 "register_operand") + (and (not (match_test "TARGET_MMX_WITH_SSE && mode == V4HImode")) + (match_operand 0 "memory_operand" + ;; True if the operand is an SSE register. (define_predicate "sse_reg_operand" (and (match_code "reg") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 92f5ad17156..379da16615d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15579,7 +15579,7 @@ (set_attr "mode" "DI,TI,TI")]) (define_mode_iterator PMULHRSW - [V4HI V8HI (V16HI "TARGET_AVX2")]) + [V8HI (V16HI "TARGET_AVX2")]) (define_expand "_pmulhrsw3_mask" [(set (match_operand:PMULHRSW 0 "register_operand") @@ -15604,21 +15604,24 @@ ix86_fixup_binary_operands_no_copy (MULT, mode, operands); }) +(define_mode_iterator PMULHRSW_MMX + [V4HI V8HI (V16HI "TARGET_AVX2")]) + (define_expand "_pmulhrsw3" - [(set (match_operand:PMULHRSW 0 "register_operand") - (truncate:PMULHRSW + [(set (match_operand:PMULHRSW_MMX 0 "register_operand") + (truncate:PMULHRSW_MMX (lshiftrt: (plus: (lshiftrt: (mult: (sign_extend: - (match_operand:PMULHRSW 1 "nonimmediate_operand")) + (match_operand:PMULHRSW_MMX 1 "register_pmulhrswmem_operand")) (sign_extend: - (match_operand:PMULHRSW 2 "nonimmediate_operand"))) + (match_operand:PMULHRSW_MMX 2 "register_pmulhrswmem_operand"))) (const_int 14)) (match_dup 3)) (const_int 1] - "TARGET_AVX2" + "TARGET_SSSE3" { operands[3] = CONST1_RTX(mode); ix86_fixup_binary_operands_no_copy (MULT, mode, operands); -- 2.20.1
[PATCH 30/42] i386: Emulate MMX ssse3_pmaddubsw with SSE
Emulate MMX ssse3_pmaddubsw with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_pmaddubsw): Add SSE emulation. --- gcc/config/i386/sse.md | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0565ddc177f..dc07173cb1c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15544,17 +15544,17 @@ (set_attr "mode" "TI")]) (define_insn "ssse3_pmaddubsw" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (ss_plus:V4HI (mult:V4HI (zero_extend:V4HI (vec_select:V4QI - (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 1 "register_operand" "0,0,Yv") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4HI (vec_select:V4QI - (match_operand:V8QI 2 "nonimmediate_operand" "ym") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)] (mult:V4HI @@ -15566,13 +15566,17 @@ (vec_select:V4QI (match_dup 2) (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))] - "TARGET_SSSE3" - "pmaddubsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + pmaddubsw\t{%2, %0|%0, %2} + pmaddubsw\t{%2, %0|%0, %2} + vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_mode_iterator PMULHRSW [V4HI V8HI (V16HI "TARGET_AVX2")]) -- 2.20.1
[PATCH 29/42] i386: Emulate MMX ssse3_phdv2si3 with SSE
Emulate MMX ssse3_phdv2si3 with SSE by moving bits 64:95 to bits 32:63 in SSE register. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_phdv2si3): Changed to define_insn_and_split to support SSE emulation. --- gcc/config/i386/sse.md | 34 ++ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 38b83c57ffc..0565ddc177f 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15356,26 +15356,44 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "ssse3_phdv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") +(define_insn_and_split "ssse3_phdv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv") (vec_concat:V2SI (plusminus:SI (vec_select:SI - (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 1 "register_operand" "0,0,Yv") (parallel [(const_int 0)])) (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) (plusminus:SI (vec_select:SI - (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv") (parallel [(const_int 0)])) (vec_select:SI (match_dup 2) (parallel [(const_int 1)])] - "TARGET_SSSE3" - "phd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + phd\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(const_int 0)] +{ + /* Generate SSE version of the operation. */ + rtx op0 = lowpart_subreg (V4SImode, operands[0], + GET_MODE (operands[0])); + rtx op1 = lowpart_subreg (V4SImode, operands[1], + GET_MODE (operands[1])); + rtx op2 = lowpart_subreg (V4SImode, operands[2], + GET_MODE (operands[2])); + emit_insn (gen_ssse3_phdv4si3 (op0, op1, op2)); + ix86_move_vector_high_sse_to_mmx (op0); + DONE; +} + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "avx2_pmaddubsw256" [(set (match_operand:V16HI 0 "register_operand" "=x,v") -- 2.20.1
[PATCH 33/42] i386: Emulate MMX ssse3_psign3 with SSE
Emulate MMX ssse3_psign3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_psign3): Add SSE emulation. --- gcc/config/i386/sse.md | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 704e211c0b8..c2dbd59049a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15755,17 +15755,21 @@ (set_attr "mode" "")]) (define_insn "ssse3_psign3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") (unspec:MMXMODEI - [(match_operand:MMXMODEI 1 "register_operand" "0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] + [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")] UNSPEC_PSIGN))] - "TARGET_SSSE3" - "psign\t{%2, %0|%0, %2}"; - [(set_attr "type" "sselog1") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + psign\t{%2, %0|%0, %2} + psign\t{%2, %0|%0, %2} + vpsign\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "_palignr_mask" [(set (match_operand:VI1_AVX512 0 "register_operand" "=v") -- 2.20.1
[PATCH 34/42] i386: Emulate MMX ssse3_palignrdi with SSE
Emulate MMX version of palignrq with SSE version by concatenating 2 64-bit MMX operands into a single 128-bit SSE operand, followed by SSE psrldq. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_palignrdi): Changed to define_insn_and_split to support SSE emulation. --- gcc/config/i386/sse.md | 58 ++ 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c2dbd59049a..ec68b5dc2ce 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15824,23 +15824,61 @@ (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "")]) -(define_insn "ssse3_palignrdi" - [(set (match_operand:DI 0 "register_operand" "=y") - (unspec:DI [(match_operand:DI 1 "register_operand" "0") - (match_operand:DI 2 "nonimmediate_operand" "ym") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] +(define_insn_and_split "ssse3_palignrdi" + [(set (match_operand:DI 0 "register_operand" "=y,x,Yv") + (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv") + (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")] UNSPEC_PALIGNR))] - "TARGET_SSSE3" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" { - operands[3] = GEN_INT (INTVAL (operands[3]) / 8); - return "palignr\t{%3, %2, %0|%0, %2, %3}"; + switch (which_alternative) +{ +case 0: + operands[3] = GEN_INT (INTVAL (operands[3]) / 8); + return "palignr\t{%3, %2, %0|%0, %2, %3}"; +case 1: +case 2: + return "#"; +default: + gcc_unreachable (); +} } - [(set_attr "type" "sseishft") + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 0) + (lshiftrt:V1TI (match_dup 0) (match_dup 3)))] +{ + /* Emulate MMX palignrdi with SSE psrldq. */ + rtx op0 = lowpart_subreg (V2DImode, operands[0], + GET_MODE (operands[0])); + rtx insn; + if (TARGET_AVX) +insn = gen_vec_concatv2di (op0, operands[2], operands[1]); + else +{ + /* NB: SSE can only concatenate OP0 and OP1 to OP0. */ + insn = gen_vec_concatv2di (op0, operands[1], operands[2]); + emit_insn (insn); + /* Swap bits 0:63 with bits 64:127. */ + rtx mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, GEN_INT (2), + GEN_INT (3), + GEN_INT (0), + GEN_INT (1))); + rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0)); + rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask); + insn = gen_rtx_SET (op1, op2); +} + emit_insn (insn); + operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0)); +} + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sseishft") (set_attr "atom_unit" "sishuf") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI ;; modes for abs instruction on pre AVX-512 targets. -- 2.20.1
[PATCH 19/42] i386: Emulate MMX mmx_pmovmskb with SSE
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb from QImode to SImode. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_pmovmskb): Changed to define_insn_and_split to support SSE emulation. --- gcc/config/i386/mmx.md | 30 +++--- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 058791e01e6..9c552f929f1 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1762,14 +1762,30 @@ [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_pmovmskb" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] +(define_insn_and_split "mmx_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")] UNSPEC_MOVMSK))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "@ + pmovmskb\t{%1, %0|%0, %1} + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 0) +(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)) + (set (match_dup 0) + (zero_extend:SI (match_dup 2)))] +{ + /* Generate SSE pmovmskb and zero-extend from QImode to SImode. */ + operands[1] = lowpart_subreg (V16QImode, operands[1], + GET_MODE (operands[1])); + operands[2] = lowpart_subreg (QImode, operands[0], + GET_MODE (operands[0])); +} + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "mmxcvt,ssemov") + (set_attr "mode" "DI,TI")]) (define_expand "mmx_maskmovq" [(set (match_operand:V8QI 0 "memory_operand") -- 2.20.1
[PATCH 15/42] i386: Emulate MMX sse_cvtpi2ps with SSE
Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of destination XMM register. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (sse_cvtpi2ps): Changed to define_insn_and_split. Also allow TARGET_MMX_WITH_SSE. Add SSE emulation. --- gcc/config/i386/sse.md | 64 -- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 70e3669d115..06c9b5b58f1 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4561,16 +4561,64 @@ ;; ; -(define_insn "sse_cvtpi2ps" - [(set (match_operand:V4SF 0 "register_operand" "=x") +(define_insn_and_split "sse_cvtpi2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv") (vec_merge:V4SF (vec_duplicate:V4SF - (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) - (match_operand:V4SF 1 "register_operand" "0") - (const_int 3)))] - "TARGET_SSE" - "cvtpi2ps\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") + (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V4SF 1 "register_operand" "0,0,Yv") + (const_int 3))) + (clobber (match_scratch:V4SF 3 "=X,x,Yv"))] + "TARGET_SSE || TARGET_MMX_WITH_SSE" + "@ + cvtpi2ps\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(const_int 0)] +{ + rtx op2 = lowpart_subreg (V4SImode, operands[2], + GET_MODE (operands[2])); + /* Generate SSE2 cvtdq2ps. */ + rtx insn = gen_floatv4siv4sf2 (operands[3], op2); + emit_insn (insn); + + /* Merge operands[3] with operands[0]. */ + rtx mask, op1; + if (TARGET_AVX) +{ + mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, GEN_INT (0), GEN_INT (1), + GEN_INT (6), GEN_INT (7))); + op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]); + op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask); + insn = gen_rtx_SET (operands[0], op2); +} + else +{ + /* NB: SSE can only concatenate OP0 and OP3 to OP0. */ + mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, GEN_INT (2), GEN_INT (3), + GEN_INT (4), GEN_INT (5))); + op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]); + op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask); + insn = gen_rtx_SET (operands[0], op2); + emit_insn (insn); + + /* Swap bits 0:63 with bits 64:127. */ + mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, GEN_INT (2), GEN_INT (3), + GEN_INT (0), GEN_INT (1))); + rtx dest = lowpart_subreg (V4SImode, operands[0], +GET_MODE (operands[0])); + op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask); + insn = gen_rtx_SET (dest, op1); +} + emit_insn (insn); + DONE; +} + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) (define_insn "sse_cvtps2pi" -- 2.20.1
[PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE
Emulate MMX mmx_pinsrw with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (*mmx_pinsrw): Add SSE emulation. --- gcc/config/i386/mmx.md | 33 +++-- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 22547c7da6f..1e68d1bb338 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1282,32 +1282,45 @@ (match_operand:SI 2 "nonimmediate_operand")) (match_operand:V4HI 1 "register_operand") (match_operand:SI 3 "const_0_to_3_operand")))] - "TARGET_SSE || TARGET_3DNOW_A" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" { operands[2] = gen_lowpart (HImode, operands[2]); operands[3] = GEN_INT (1 << INTVAL (operands[3])); }) (define_insn "*mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (vec_merge:V4HI (vec_duplicate:V4HI -(match_operand:HI 2 "nonimmediate_operand" "rm")) - (match_operand:V4HI 1 "register_operand" "0") +(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm")) + (match_operand:V4HI 1 "register_operand" "0,0,Yv") (match_operand:SI 3 "const_int_operand")))] - "(TARGET_SSE || TARGET_3DNOW_A) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ((unsigned) exact_log2 (INTVAL (operands[3])) < GET_MODE_NUNITS (V4HImode))" { operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); - if (MEM_P (operands[2])) -return "pinsrw\t{%3, %2, %0|%0, %2, %3}"; + if (TARGET_MMX_WITH_SSE && TARGET_AVX) +{ + if (MEM_P (operands[2])) + return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + else + return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; +} else -return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; +{ + if (MEM_P (operands[2])) + return "pinsrw\t{%3, %2, %0|%0, %2, %3}"; + else + return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; +} } - [(set_attr "type" "mmxcvt") + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxcvt,sselog,sselog") (set_attr "length_immediate" "1") - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_pextrw" [(set (match_operand:SI 0 "register_operand" "=r,r") -- 2.20.1
[PATCH 20/42] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
Emulate MMX mmx_umulv4hi3_highpart with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (*mmx_umulv4hi3_highpart): Add SSE emulation. --- gcc/config/i386/mmx.md | 26 -- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 9c552f929f1..d78c6a31962 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -781,28 +781,34 @@ (lshiftrt:V4SI (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand")) + (match_operand:V4HI 1 "register_mmxmem_operand")) (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand"))) + (match_operand:V4HI 2 "register_mmxmem_operand"))) (const_int 16] - "TARGET_SSE || TARGET_3DNOW_A" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_umulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (truncate:V4HI (lshiftrt:V4SI (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) (const_int 16] - "(TARGET_SSE || TARGET_3DNOW_A) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (MULT, V4HImode, operands)" - "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) + "@ + pmulhuw\t{%2, %0|%0, %2} + pmulhuw\t{%2, %0|%0, %2} + vpmulhuw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxmul,ssemul,ssemul") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_pmaddwd" [(set (match_operand:V2SI 0 "register_operand") -- 2.20.1
[PATCH 13/42] i386: Emulate MMX pshufw with SSE
Emulate MMX pshufw with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_pshufw): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (mmx_pshufw_1): Add SSE emulation. (*vec_dupv4hi): Changed to define_insn_and_split and also allow TARGET_MMX_WITH_SSE to support SSE emulation. --- gcc/config/i386/mmx.md | 81 +- 1 file changed, 65 insertions(+), 16 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index d568a534956..43f85064cd9 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1323,9 +1323,10 @@ (define_expand "mmx_pshufw" [(match_operand:V4HI 0 "register_operand") - (match_operand:V4HI 1 "nonimmediate_operand") + (match_operand:V4HI 1 "register_mmxmem_operand") (match_operand:SI 2 "const_int_operand")] - "TARGET_SSE || TARGET_3DNOW_A" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" { int mask = INTVAL (operands[2]); emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1], @@ -1337,14 +1338,15 @@ }) (define_insn "mmx_pshufw_1" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,Yv") (vec_select:V4HI - (match_operand:V4HI 1 "nonimmediate_operand" "ym") + (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yv") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") (match_operand 5 "const_0_to_3_operand")])))] - "TARGET_SSE || TARGET_3DNOW_A" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" { int mask = 0; mask |= INTVAL (operands[2]) << 0; @@ -1353,11 +1355,20 @@ mask |= INTVAL (operands[5]) << 6; operands[2] = GEN_INT (mask); - return "pshufw\t{%2, %1, %0|%0, %1, %2}"; + switch (which_alternative) +{ +case 0: + return "pshufw\t{%2, %1, %0|%0, %1, %2}"; +case 1: + return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; +default: + gcc_unreachable (); +} } - [(set_attr "type" "mmxcvt") + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "mmxcvt,sselog") (set_attr "length_immediate" "1") - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI")]) (define_insn "mmx_pswapdv2si2" [(set (match_operand:V2SI 0 "register_operand" "=y") @@ -1370,16 +1381,54 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) -(define_insn "*vec_dupv4hi" - [(set (match_operand:V4HI 0 "register_operand" "=y") +(define_insn_and_split "*vec_dupv4hi" + [(set (match_operand:V4HI 0 "register_operand" "=y,Yv,Yw") (vec_duplicate:V4HI (truncate:HI - (match_operand:SI 1 "register_operand" "0"] - "TARGET_SSE || TARGET_3DNOW_A" - "pshufw\t{$0, %0, %0|%0, %0, 0}" - [(set_attr "type" "mmxcvt") - (set_attr "length_immediate" "1") - (set_attr "mode" "DI")]) + (match_operand:SI 1 "register_operand" "0,Yv,r"] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "@ + pshufw\t{$0, %0, %0|%0, %0, 0} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(const_int 0)] +{ + rtx op; + operands[0] = lowpart_subreg (V8HImode, operands[0], + GET_MODE (operands[0])); + if (TARGET_AVX2) +{ + operands[1] = lowpart_subreg (HImode, operands[1], + GET_MODE (operands[1])); + op = gen_rtx_VEC_DUPLICATE (V8HImode, operands[1]); +} + else +{ + operands[1] = lowpart_subreg (V8HImode, operands[1], + GET_MODE (operands[1])); + rtx mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (8, + GEN_INT (0), + GEN_INT (0), + GEN_INT (0), + GEN_INT (0), + GEN_INT (4), + GEN_INT (5), + GEN_INT (6), + GEN_INT (7))); + + op = gen_rtx_VEC_SELECT (V8HImode, operands[1], mask); +} + rtx insn = gen_rtx_SET (operands[0], op); + emit_insn (insn); + DONE; +} + [(set_attr "mmx_isa" "native,x64,x64_avx") + (set_attr "type" "mmxcvt,sselog1,ssemov") + (set_attr "length_immediate" "1,1,0") + (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "*vec_dupv2si" [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw") -- 2.20.1
[PATCH 14/42] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE. PR target/89021 * config/i386/sse.md (sse_cvtps2pi): Add SSE emulation. (sse_cvttps2pi): Likewise. --- gcc/config/i386/sse.md | 30 ++ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c8e0133560a..70e3669d115 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4574,26 +4574,32 @@ (set_attr "mode" "V4SF")]) (define_insn "sse_cvtps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") + [(set (match_operand:V2SI 0 "register_operand" "=y,Yv") (vec_select:V2SI - (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] + (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")] UNSPEC_FIX_NOTRUNC) (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvtps2pi\t{%1, %0|%0, %q1}" - [(set_attr "type" "ssecvt") - (set_attr "unit" "mmx") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE" + "@ + cvtps2pi\t{%1, %0|%0, %q1} + %vcvtps2dq\t{%1, %0|%0, %1}" + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "ssecvt") + (set_attr "unit" "mmx,*") (set_attr "mode" "DI")]) (define_insn "sse_cvttps2pi" - [(set (match_operand:V2SI 0 "register_operand" "=y") + [(set (match_operand:V2SI 0 "register_operand" "=y,Yv") (vec_select:V2SI - (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")) (parallel [(const_int 0) (const_int 1)])))] - "TARGET_SSE" - "cvttps2pi\t{%1, %0|%0, %q1}" - [(set_attr "type" "ssecvt") - (set_attr "unit" "mmx") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE" + "@ + cvttps2pi\t{%1, %0|%0, %q1} + %vcvttps2dq\t{%1, %0|%0, %1}" + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "ssecvt") + (set_attr "unit" "mmx,*") (set_attr "prefix_rep" "0") (set_attr "mode" "SF")]) -- 2.20.1
[PATCH 23/42] i386: Emulate MMX mmx_uavgv4hi3 with SSE
Emulate MMX mmx_uavgv4hi3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_uavgv4hi3): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (*mmx_uavgv4hi3): Add SSE emulation. --- gcc/config/i386/mmx.md | 26 -- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 570153521a1..b8983e1755a 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1735,33 +1735,39 @@ (plus:V4SI (plus:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand")) + (match_operand:V4HI 1 "register_mmxmem_operand")) (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand"))) + (match_operand:V4HI 2 "register_mmxmem_operand"))) (const_vector:V4SI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1] - "TARGET_SSE || TARGET_3DNOW_A" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);") (define_insn "*mmx_uavgv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (truncate:V4HI (lshiftrt:V4SI (plus:V4SI (plus:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) (const_vector:V4SI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1] - "(TARGET_SSE || TARGET_3DNOW_A) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (PLUS, V4HImode, operands)" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) + "@ + pavgw\t{%2, %0|%0, %2} + pavgw\t{%2, %0|%0, %2} + vpavgw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sseiadd,sseiadd") + (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_psadbw" [(set (match_operand:V1DI 0 "register_operand" "=y") -- 2.20.1
[PATCH 05/42] i386: Emulate MMX mulv4hi3 with SSE
Emulate MMX mulv4hi3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE. (mulv4hi3): New. (*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE. Add SSE support. --- gcc/config/i386/mmx.md | 32 ++-- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 517c3283963..cdb0f698001 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -716,19 +716,31 @@ (define_expand "mmx_mulv4hi3" [(set (match_operand:V4HI 0 "register_operand") -(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand") - (match_operand:V4HI 2 "nonimmediate_operand")))] - "TARGET_MMX" +(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand") + (match_operand:V4HI 2 "register_mmxmem_operand")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") + +(define_expand "mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand") +(mult:V4HI (match_operand:V4HI 1 "register_operand") + (match_operand:V4HI 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_mulv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") -(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" - "pmullw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") +(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "@ + pmullw\t{%2, %0|%0, %2} + pmullw\t{%2, %0|%0, %2} + vpmullw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxmul,ssemul,ssemul") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_smulv4hi3_highpart" [(set (match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 06/42] i386: Emulate MMX smulv4hi3_highpart with SSE
Emulate MMX mulv4hi3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. (*mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. Add SSE support. --- gcc/config/i386/mmx.md | 25 +++-- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index cdb0f698001..3a7964d52bb 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -748,27 +748,32 @@ (lshiftrt:V4SI (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand")) + (match_operand:V4HI 1 "register_mmxmem_operand")) (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand"))) + (match_operand:V4HI 2 "register_mmxmem_operand"))) (const_int 16] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_smulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (truncate:V4HI (lshiftrt:V4SI (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) (const_int 16] - "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" - "pmulhw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "@ + pmulhw\t{%2, %0|%0, %2} + pmulhw\t{%2, %0|%0, %2} + vpmulhw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxmul,ssemul,ssemul") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_umulv4hi3_highpart" [(set (match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 18/42] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_v4hi3): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (mmx_v8qi3): Likewise. (smaxmin:v4hi3): New. (umaxmin:v8qi3): Likewise. (smaxmin:*mmx_v4hi3): Add SSE emulation. (umaxmin:*mmx_v8qi3): Likewise. --- gcc/config/i386/mmx.md | 68 +- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 1e68d1bb338..058791e01e6 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -923,40 +923,68 @@ (define_expand "mmx_v4hi3" [(set (match_operand:V4HI 0 "register_operand") (smaxmin:V4HI - (match_operand:V4HI 1 "nonimmediate_operand") - (match_operand:V4HI 2 "nonimmediate_operand")))] - "TARGET_SSE || TARGET_3DNOW_A" + (match_operand:V4HI 1 "register_mmxmem_operand") + (match_operand:V4HI 2 "register_mmxmem_operand")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);") + +(define_expand "v4hi3" + [(set (match_operand:V4HI 0 "register_operand") +(smaxmin:V4HI + (match_operand:V4HI 1 "register_operand") + (match_operand:V4HI 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);") (define_insn "*mmx_v4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (smaxmin:V4HI - (match_operand:V4HI 1 "nonimmediate_operand" "%0") - (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] - "(TARGET_SSE || TARGET_3DNOW_A) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (, V4HImode, operands)" - "pw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) + "@ + pw\t{%2, %0|%0, %2} + pw\t{%2, %0|%0, %2} + vpw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxadd,sseiadd,sseiadd") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_v8qi3" [(set (match_operand:V8QI 0 "register_operand") (umaxmin:V8QI - (match_operand:V8QI 1 "nonimmediate_operand") - (match_operand:V8QI 2 "nonimmediate_operand")))] - "TARGET_SSE || TARGET_3DNOW_A" + (match_operand:V8QI 1 "register_mmxmem_operand") + (match_operand:V8QI 2 "register_mmxmem_operand")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);") + +(define_expand "v8qi3" + [(set (match_operand:V8QI 0 "register_operand") +(umaxmin:V8QI + (match_operand:V8QI 1 "register_operand") + (match_operand:V8QI 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);") (define_insn "*mmx_v8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") (umaxmin:V8QI - (match_operand:V8QI 1 "nonimmediate_operand" "%0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] - "(TARGET_SSE || TARGET_3DNOW_A) + (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (, V8QImode, operands)" - "pb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) + "@ + pb\t{%2, %0|%0, %2} + pb\t{%2, %0|%0, %2} + vpb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxadd,sseiadd,sseiadd") + (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_ashr3" [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv") -- 2.20.1
[PATCH 16/42] i386: Emulate MMX mmx_pextrw with SSE
Emulate MMX mmx_pextrw with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_pextrw): Add SSE emulation. --- gcc/config/i386/mmx.md | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 43f85064cd9..22547c7da6f 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1310,16 +1310,20 @@ (set_attr "mode" "DI")]) (define_insn "mmx_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "=r,r") (zero_extend:SI (vec_select:HI - (match_operand:V4HI 1 "register_operand" "y") - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]] - "TARGET_SSE || TARGET_3DNOW_A" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") + (match_operand:V4HI 1 "register_operand" "y,Yv") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "@ + pextrw\t{%2, %1, %0|%0, %1, %2} + %vpextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "mmxcvt,sselog1") (set_attr "length_immediate" "1") - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI")]) (define_expand "mmx_pshufw" [(match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 11/42] i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
Emulate MMX mmx_eq/mmx_gt3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_eq3): Also allow TARGET_MMX_WITH_SSE. (*mmx_eq3): Also allow TARGET_MMX_WITH_SSE. Add SSE support. (mmx_gt3): Likewise. --- gcc/config/i386/mmx.md | 43 +- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 510d453f0fd..b0c6a8c8077 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1017,30 +1017,39 @@ (define_expand "mmx_eq3" [(set (match_operand:MMXMODEI 0 "register_operand") (eq:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand") - (match_operand:MMXMODEI 2 "nonimmediate_operand")))] - "TARGET_MMX" + (match_operand:MMXMODEI 1 "register_mmxmem_operand") + (match_operand:MMXMODEI 2 "register_mmxmem_operand")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);") (define_insn "*mmx_eq3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") (eq:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX && ix86_binary_operator_ok (EQ, mode, operands)" - "pcmpeq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) + (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (EQ, mode, operands)" + "@ + pcmpeq\t{%2, %0|%0, %2} + pcmpeq\t{%2, %0|%0, %2} + vpcmpeq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxcmp,ssecmp,ssecmp") + (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_gt3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") (gt:MMXMODEI - (match_operand:MMXMODEI 1 "register_operand" "0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pcmpgt\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxcmp") - (set_attr "mode" "DI")]) + (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + pcmpgt\t{%2, %0|%0, %2} + pcmpgt\t{%2, %0|%0, %2} + vpcmpgt\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxcmp,ssecmp,ssecmp") + (set_attr "mode" "DI,TI,TI")]) ; ;; -- 2.20.1
[PATCH 10/42] i386: Emulate MMX mmx_andnot3 with SSE
Emulate MMX mmx_andnot3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_andnot3): Also allow TARGET_MMX_WITH_SSE. Add SSE support. --- gcc/config/i386/mmx.md | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 7e2d40313c3..510d453f0fd 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1049,14 +1049,18 @@ ; (define_insn "mmx_andnot3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") (and:MMXMODEI - (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0")) - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX" - "pandn\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) + (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")) + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + pandn\t{%2, %0|%0, %2} + pandn\t{%2, %0|%0, %2} + vpandn\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxadd,sselog,sselog") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_3" [(set (match_operand:MMXMODEI 0 "register_operand") -- 2.20.1
[PATCH 07/42] i386: Emulate MMX mmx_pmaddwd with SSE
Emulate MMX pmaddwd with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE. (*mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE. Add SSE support. --- gcc/config/i386/mmx.md | 25 +++-- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 3a7964d52bb..9f0311badca 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -810,11 +810,11 @@ (mult:V2SI (sign_extend:V2SI (vec_select:V2HI - (match_operand:V4HI 1 "nonimmediate_operand") + (match_operand:V4HI 1 "register_mmxmem_operand") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2SI (vec_select:V2HI - (match_operand:V4HI 2 "nonimmediate_operand") + (match_operand:V4HI 2 "register_mmxmem_operand") (parallel [(const_int 0) (const_int 2)] (mult:V2SI (sign_extend:V2SI @@ -823,20 +823,20 @@ (sign_extend:V2SI (vec_select:V2HI (match_dup 2) (parallel [(const_int 1) (const_int 3)]))] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_pmaddwd" - [(set (match_operand:V2SI 0 "register_operand" "=y") + [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv") (plus:V2SI (mult:V2SI (sign_extend:V2SI (vec_select:V2HI - (match_operand:V4HI 1 "nonimmediate_operand" "%0") + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2SI (vec_select:V2HI - (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv") (parallel [(const_int 0) (const_int 2)] (mult:V2SI (sign_extend:V2SI @@ -845,10 +845,15 @@ (sign_extend:V2SI (vec_select:V2HI (match_dup 2) (parallel [(const_int 1) (const_int 3)]))] - "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)" - "pmaddwd\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (MULT, V4HImode, operands)" + "@ + pmaddwd\t{%2, %0|%0, %2} + pmaddwd\t{%2, %0|%0, %2} + vpmaddwd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxmul,sseiadd,sseiadd") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_pmulhrwv4hi3" [(set (match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 09/42] i386: Emulate MMX 3 with SSE
Emulate MMX 3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (any_logic:mmx_3): Also allow TARGET_MMX_WITH_SSE. (any_logic:3): New. (any_logic:*mmx_3): Also allow TARGET_MMX_WITH_SSE. Add SSE support. --- gcc/config/i386/mmx.md | 33 +++-- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 240e0188a78..7e2d40313c3 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1061,20 +1061,33 @@ (define_expand "mmx_3" [(set (match_operand:MMXMODEI 0 "register_operand") (any_logic:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand") - (match_operand:MMXMODEI 2 "nonimmediate_operand")))] - "TARGET_MMX" + (match_operand:MMXMODEI 1 "register_mmxmem_operand") + (match_operand:MMXMODEI 2 "register_mmxmem_operand")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "ix86_fixup_binary_operands_no_copy (, mode, operands);") + +(define_expand "3" + [(set (match_operand:MMXMODEI 0 "register_operand") + (any_logic:MMXMODEI + (match_operand:MMXMODEI 1 "register_operand") + (match_operand:MMXMODEI 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*mmx_3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") (any_logic:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)" - "p\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) + (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (, mode, operands)" + "@ + p\t{%2, %0|%0, %2} + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxadd,sselog,sselog") + (set_attr "mode" "DI,TI,TI")]) ; ;; -- 2.20.1
[PATCH 08/42] i386: Emulate MMX ashr3/3 with SSE
Emulate MMX ashr3/3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_ashr3): Also allow TARGET_MMX_WITH_SSE. Add SSE emulation. (mmx_3): Likewise. (ashr3): New. (3): Likewise. --- gcc/config/i386/mmx.md | 50 ++ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 9f0311badca..240e0188a78 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -959,32 +959,54 @@ (set_attr "mode" "DI")]) (define_insn "mmx_ashr3" - [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv") (ashiftrt:MMXMODE24 - (match_operand:MMXMODE24 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yN")))] - "TARGET_MMX" - "psra\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") + (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv") + (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + psra\t{%2, %0|%0, %2} + psra\t{%2, %0|%0, %2} + vpsra\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sseishft,sseishft") (set (attr "length_immediate") (if_then_else (match_operand 2 "const_int_operand") (const_string "1") (const_string "0"))) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) + +(define_expand "ashr3" + [(set (match_operand:MMXMODE24 0 "register_operand") +(ashiftrt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_MMX_WITH_SSE") (define_insn "mmx_3" - [(set (match_operand:MMXMODE248 0 "register_operand" "=y") + [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv") (any_lshift:MMXMODE248 - (match_operand:MMXMODE248 1 "register_operand" "0") - (match_operand:DI 2 "nonmemory_operand" "yN")))] - "TARGET_MMX" - "p\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") + (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv") + (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + p\t{%2, %0|%0, %2} + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sseishft,sseishft") (set (attr "length_immediate") (if_then_else (match_operand 2 "const_int_operand") (const_string "1") (const_string "0"))) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) + +(define_expand "3" + [(set (match_operand:MMXMODE248 0 "register_operand") +(any_lshift:MMXMODE248 + (match_operand:MMXMODE248 1 "register_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_MMX_WITH_SSE") ; ;; -- 2.20.1
[PATCH 02/42] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb plus moving bits 64:95 to bits 32:63 in SSE register. Only SSE register source operand is allowed. 2019-02-08 H.J. Lu Uros Bizjak PR target/89021 * config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx): New prototype. (ix86_split_mmx_pack): Likewise. * config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New function. (ix86_split_mmx_pack): Likewise. * config/i386/i386.md (mmx_isa): New. (enabled): Also check mmx_isa. * config/i386/mmx.md (any_s_truncate): New code iterator. (s_trunsuffix): New code attr. (mmx_packsswb): Removed. (mmx_packssdw): Likewise. (mmx_packuswb): Likewise. (mmx_packswb): New define_insn_and_split to emulate MMX packsswb/packuswb with SSE2. (mmx_packssdw): Likewise. * config/i386/predicates.md (register_mmxmem_operand): New. --- gcc/config/i386/i386-protos.h | 3 ++ gcc/config/i386/i386.c| 54 gcc/config/i386/i386.md | 13 +++ gcc/config/i386/mmx.md| 67 +++ gcc/config/i386/predicates.md | 7 5 files changed, 114 insertions(+), 30 deletions(-) diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 27f5cc13abf..a53b48438ec 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -202,6 +202,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx); extern rtx ix86_split_stack_guard (void); +extern void ix86_move_vector_high_sse_to_mmx (rtx); +extern void ix86_split_mmx_pack (rtx[], enum rtx_code); + #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); #endif /* TREE_CODE */ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7d7dd80930e..d31b69d9a82 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -20221,6 +20221,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) gcc_unreachable (); } +/* Move bits 64:95 to bits 32:63. */ + +void +ix86_move_vector_high_sse_to_mmx (rtx op) +{ + rtx mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, GEN_INT (0), GEN_INT (2), + GEN_INT (0), GEN_INT (0))); + rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op)); + op = gen_rtx_VEC_SELECT (V4SImode, dest, mask); + rtx insn = gen_rtx_SET (dest, op); + emit_insn (insn); +} + +/* Split MMX pack with signed/unsigned saturation with SSE/SSE2. */ + +void +ix86_split_mmx_pack (rtx operands[], enum rtx_code code) +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + + machine_mode dmode = GET_MODE (op0); + machine_mode smode = GET_MODE (op1); + machine_mode inner_dmode = GET_MODE_INNER (dmode); + machine_mode inner_smode = GET_MODE_INNER (smode); + + /* Get the corresponding SSE mode for destination. */ + int nunits = 16 / GET_MODE_SIZE (inner_dmode); + machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode), + nunits).require (); + machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode), +nunits / 2).require (); + + /* Get the corresponding SSE mode for source. */ + nunits = 16 / GET_MODE_SIZE (inner_smode); + machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode), + nunits).require (); + + /* Generate SSE pack with signed/unsigned saturation. */ + rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0)); + op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1)); + op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2)); + + op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1); + op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2); + rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode, + op1, op2)); + emit_insn (insn); + + ix86_move_vector_high_sse_to_mmx (op0); +} + /* Helper function of ix86_fixup_binary_operands to canonicalize operand order. Returns true if the operands should be swapped. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 40ed93dc804..e1727676deb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -792,6 +792,10 @@ avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw" (const_string "base")) +;; Define instruction set of MMX instructions +(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" + (const_string "base")) + (define_attr "enabled" "" (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT") (eq_attr "isa" "x64_sse2") @@ -830,6 +834,15 @@ (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") (eq_attr "isa" "avx512vl")
[PATCH 04/42] i386: Emulate MMX plusminus/sat_plusminus with SSE
Emulate MMX plusminus/sat_plusminus with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI. (plusminus:mmx_3): Check TARGET_MMX_WITH_SSE. (sat_plusminus:mmx_3): Likewise. (3): New. (*mmx_3): Add SSE emulation. (*mmx_3): Likewise. --- gcc/config/i386/mmx.md | 59 +++--- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 34fecd6a745..517c3283963 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -45,7 +45,7 @@ ;; 8 byte integral modes handled by MMX (and by extension, SSE) (define_mode_iterator MMXMODEI [V8QI V4HI V2SI]) -(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI]) +(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")]) ;; All 8-byte vector modes handled by MMX (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF]) @@ -663,39 +663,56 @@ (define_expand "mmx_3" [(set (match_operand:MMXMODEI8 0 "register_operand") (plusminus:MMXMODEI8 - (match_operand:MMXMODEI8 1 "nonimmediate_operand") - (match_operand:MMXMODEI8 2 "nonimmediate_operand")))] - "TARGET_MMX || (TARGET_SSE2 && mode == V1DImode)" + (match_operand:MMXMODEI8 1 "register_mmxmem_operand") + (match_operand:MMXMODEI8 2 "register_mmxmem_operand")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "ix86_fixup_binary_operands_no_copy (, mode, operands);") + +(define_expand "3" + [(set (match_operand:MMXMODEI 0 "register_operand") + (plusminus:MMXMODEI + (match_operand:MMXMODEI 1 "register_operand") + (match_operand:MMXMODEI 2 "register_operand")))] + "TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*mmx_3" - [(set (match_operand:MMXMODEI8 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv") (plusminus:MMXMODEI8 - (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0") - (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))] - "(TARGET_MMX || (TARGET_SSE2 && mode == V1DImode)) + (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv") + (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (, mode, operands)" - "p\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) + "@ + p\t{%2, %0|%0, %2} + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxadd,sseadd,sseadd") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_3" [(set (match_operand:MMXMODE12 0 "register_operand") (sat_plusminus:MMXMODE12 - (match_operand:MMXMODE12 1 "nonimmediate_operand") - (match_operand:MMXMODE12 2 "nonimmediate_operand")))] - "TARGET_MMX" + (match_operand:MMXMODE12 1 "register_mmxmem_operand") + (match_operand:MMXMODE12 2 "register_mmxmem_operand")))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*mmx_3" - [(set (match_operand:MMXMODE12 0 "register_operand" "=y") + [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv") (sat_plusminus:MMXMODE12 - (match_operand:MMXMODE12 1 "nonimmediate_operand" "0") - (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)" - "p\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) + (match_operand:MMXMODE12 1 "register_mmxmem_operand" "0,0,Yv") + (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (, mode, operands)" + "@ + p\t{%2, %0|%0, %2} + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxadd,sseadd,sseadd") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_mulv4hi3" [(set (match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 01/42] i386: Allow MMX register modes in SSE registers
In 64-bit mode, SSE2 can be used to emulate MMX instructions without 3DNOW. We can use SSE2 to support MMX register modes. PR target/89021 * config/i386/i386-c.c (ix86_target_macros_internal): Define __MMX_WITH_SSE__ for TARGET_MMX_WITH_SSE. * config/i386/i386.c (ix86_set_reg_reg_cost): Add support for TARGET_MMX_WITH_SSE with VALID_MMX_REG_MODE. (ix86_vector_mode_supported_p): Likewise. * config/i386/i386.h (TARGET_MMX_WITH_SSE): New. --- gcc/config/i386/i386-c.c | 2 ++ gcc/config/i386/i386.c | 5 +++-- gcc/config/i386/i386.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 5e7e46fcebe..213e1b56c6b 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -548,6 +548,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__CLDEMOTE__"); if (isa_flag2 & OPTION_MASK_ISA_PTWRITE) def_or_undef (parse_in, "__PTWRITE__"); + if (TARGET_MMX_WITH_SSE) +def_or_undef (parse_in, "__MMX_WITH_SSE__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3e5f52175d2..7d7dd80930e 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -40490,7 +40490,8 @@ ix86_set_reg_reg_cost (machine_mode mode) || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) - || (TARGET_MMX && VALID_MMX_REG_MODE (mode))) + || ((TARGET_MMX || TARGET_MMX_WITH_SSE) + && VALID_MMX_REG_MODE (mode))) units = GET_MODE_SIZE (mode); } @@ -44316,7 +44317,7 @@ ix86_vector_mode_supported_p (machine_mode mode) return true; if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) return true; - if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) + if ((TARGET_MMX ||TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode)) return true; if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) return true; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 4fd8bc40a34..91b233022c2 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -201,6 +201,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_16BIT TARGET_CODE16 #define TARGET_16BIT_P(x) TARGET_CODE16_P(x) +#define TARGET_MMX_WITH_SSE(TARGET_64BIT && TARGET_SSE2) + #include "config/vxworks-dummy.h" #include "config/i386/i386-opts.h" -- 2.20.1
[PATCH 00/42] V7: Emulate MMX intrinsics with SSE
On x86-64, since __m64 is returned and passed in XMM registers, we can emulate MMX intrinsics with SSE instructions. To support it, we added #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2) ;; Define instruction set of MMX instructions (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string "base")) (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") (eq_attr "mmx_isa" "x64") (symbol_ref "TARGET_MMX_WITH_SSE") (eq_attr "mmx_isa" "x64_avx") (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") (eq_attr "mmx_isa" "x64_noavx") (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX") We added SSE emulation to MMX patterns and disabled MMX alternatives with TARGET_MMX_WITH_SSE. Most of MMX instructions have equivalent SSE versions and results of some SSE versions need to be reshuffled to the right order for MMX. Thee are couple tricky cases: 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent. We emulate MMX maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the mask operand and handle unmapped bits 64:127 at memory address by adjusting source and mask operands together with memory address. 2. MMX movntq is emulated with SSE2 DImode movnti, which is available in 64-bit mode. 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index. SSE emulation must clear the bit 4 in the shuffle control mask. 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve the upper 64 bits of destination XMM register. Tests are also added to check each SSE emulation of MMX intrinsics. There are no regressions on i686 and x86-64. For x86-64, GCC is also tested with --with-arch=native --with-cpu=native on AVX2 and AVX512F machines. H.J. Lu (41): i386: Allow MMX register modes in SSE registers i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX i386: Emulate MMX plusminus/sat_plusminus with SSE i386: Emulate MMX mulv4hi3 with SSE i386: Emulate MMX smulv4hi3_highpart with SSE i386: Emulate MMX mmx_pmaddwd with SSE i386: Emulate MMX ashr3/3 with SSE i386: Emulate MMX 3 with SSE i386: Emulate MMX mmx_andnot3 with SSE i386: Emulate MMX mmx_eq/mmx_gt3 with SSE i386: Emulate MMX vec_dupv2si with SSE i386: Emulate MMX pshufw with SSE i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE i386: Emulate MMX sse_cvtpi2ps with SSE i386: Emulate MMX mmx_pextrw with SSE i386: Emulate MMX mmx_pinsrw with SSE i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE i386: Emulate MMX mmx_pmovmskb with SSE i386: Emulate MMX mmx_umulv4hi3_highpart with SSE i386: Emulate MMX maskmovq with SSE2 maskmovdqu i386: Emulate MMX mmx_uavgv8qi3 with SSE i386: Emulate MMX mmx_uavgv4hi3 with SSE i386: Emulate MMX mmx_psadbw with SSE i386: Emulate MMX movntq with SSE2 movntidi i386: Emulate MMX umulv1siv1di3 with SSE2 i386: Make _mm_empty () as NOP when MMX is disabled i386: Emulate MMX ssse3_phwv4hi3 with SSE i386: Emulate MMX ssse3_phdv2si3 with SSE i386: Emulate MMX ssse3_pmaddubsw with SSE i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE i386: Emulate MMX pshufb with SSE version i386: Emulate MMX ssse3_psign3 with SSE i386: Emulate MMX ssse3_palignrdi with SSE i386: Emulate MMX abs2 with SSE i386: Correct _pmulhrsw3[_mask] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE i386: Allow MMX intrinsic emulation with SSE i386: Enable TM MMX intrinsics with SSE2 i386: Add tests for MMX intrinsic emulations with SSE Uros Bizjak (1): Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE gcc/config/i386/constraints.md| 6 + gcc/config/i386/i386-builtin.def | 126 +-- gcc/config/i386/i386-c.c | 2 + gcc/config/i386/i386-protos.h | 4 + gcc/config/i386/i386.c| 189 +++- gcc/config/i386/i386.h| 2 + gcc/config/i386/i386.md | 17 + gcc/config/i386/mmintrin.h| 12 +- gcc/config/i386/mmx.md| 986 -- gcc/config/i386/predicates.md | 14 + gcc/config/i386/sse.md| 368 +-- gcc/config/i386/xmmintrin.h | 61 ++ gcc/testsuite/gcc.target/i386/mmx-vals.h | 77 ++ gcc/testsuite/gcc.target/i386/pr82483-1.c | 2 +- gcc/testsuite/gcc.target/i386/pr82483-2.c | 2 +- gcc/testsuite/gcc.target/i386/sse2-mmx-10.c | 43 + gcc/testsuite/gcc.target/i386/sse2-mmx-11.c | 39 + gcc/testsuite/gcc.target/i386/sse2-mmx-12.c | 42 + gcc/testsuite/gcc.target/i386/sse2-mmx-13.c | 40 + gcc/testsuite/gcc.target/i386/sse2-mmx-14.c | 31 + gcc/testsuite/gcc.target/i386/sse2-mmx-15.c | 36 + gcc/testsuite/gcc.target/i386/sse2-mmx-16.c
[PATCH 03/42] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX. For MMX punpckhXX, move bits 64:127 to bits 0:63 in SSE register. Only SSE register source operand is allowed. PR target/89021 * config/i386/i386-protos.h (ix86_split_mmx_punpck): New prototype. * config/i386/i386.c (ix86_split_mmx_punpck): New function. * config/i386/mmx.m (mmx_punpckhbw): Changed to define_insn_and_split to support SSE emulation. (mmx_punpcklbw): Likewise. (mmx_punpckhwd): Likewise. (mmx_punpcklwd): Likewise. (mmx_punpckhdq): Likewise. (mmx_punpckldq): Likewise. --- gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.c| 77 +++ gcc/config/i386/mmx.md| 138 ++ 3 files changed, 168 insertions(+), 48 deletions(-) diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index a53b48438ec..37581837a32 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -204,6 +204,7 @@ extern rtx ix86_split_stack_guard (void); extern void ix86_move_vector_high_sse_to_mmx (rtx); extern void ix86_split_mmx_pack (rtx[], enum rtx_code); +extern void ix86_split_mmx_punpck (rtx[], bool); #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d31b69d9a82..a76c17beece 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -20275,6 +20275,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code) ix86_move_vector_high_sse_to_mmx (op0); } +/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */ + +void +ix86_split_mmx_punpck (rtx operands[], bool high_p) +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + machine_mode mode = GET_MODE (op0); + rtx mask; + /* The corresponding SSE mode. */ + machine_mode sse_mode, double_sse_mode; + + switch (mode) +{ +case E_V8QImode: + sse_mode = V16QImode; + double_sse_mode = V32QImode; + mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (16, + GEN_INT (0), GEN_INT (16), + GEN_INT (1), GEN_INT (17), + GEN_INT (2), GEN_INT (18), + GEN_INT (3), GEN_INT (19), + GEN_INT (4), GEN_INT (20), + GEN_INT (5), GEN_INT (21), + GEN_INT (6), GEN_INT (22), + GEN_INT (7), GEN_INT (23))); + break; + +case E_V4HImode: + sse_mode = V8HImode; + double_sse_mode = V16HImode; + mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (8, + GEN_INT (0), GEN_INT (8), + GEN_INT (1), GEN_INT (9), + GEN_INT (2), GEN_INT (10), + GEN_INT (3), GEN_INT (11))); + break; + +case E_V2SImode: + sse_mode = V4SImode; + double_sse_mode = V8SImode; + mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, + GEN_INT (0), GEN_INT (4), + GEN_INT (1), GEN_INT (5))); + break; + +default: + gcc_unreachable (); +} + + /* Generate SSE punpcklXX. */ + rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0)); + op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1)); + op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2)); + + op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2); + op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask); + rtx insn = gen_rtx_SET (dest, op2); + emit_insn (insn); + + if (high_p) +{ + /* Move bits 64:127 to bits 0:63. */ + mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, GEN_INT (2), GEN_INT (3), + GEN_INT (0), GEN_INT (0))); + dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest)); + op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask); + insn = gen_rtx_SET (dest, op1); + emit_insn (insn); +} +} + /* Helper function of ix86_fixup_binary_operands to canonicalize operand order. Returns true if the operands should be swapped. */ diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index dbb2baa74d7..34fecd6a745 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1064,87 +1064,129 @@ (set_attr "type" "mmxshft,sselog,sselog") (set_attr "mode" "DI,TI,TI")]) -(define_insn "mmx_punpckhbw" - [(set (match_operand:V8QI 0 "register_operand" "=y") +(define_insn_and_split "mmx_punpckhbw" + [(set (match_operand:V8QI 0
Re: Fortran vector math header
On Tue, Feb 05, 2019 at 01:47:57PM +0100, Martin Liška wrote: > > gcc/fortran/ChangeLog: > > 2019-01-24 Martin Liska > > * decl.c (gfc_match_gcc_builtin): Add support for filtering > of builtin directive based on multilib ABI name. > > gcc/testsuite/ChangeLog: > > 2019-01-24 Martin Liska > > * gfortran.dg/simd-builtins-7.f90: New test. > * gfortran.dg/simd-builtins-7.h: New test. The Fortran bits look ok to me. -- steve
Go patch committed: Use __builtin_dwarf_cfa for getcallersp
This patch by Cherry Zhang changes the Go compiler and runtime to use __builtin_dwarf_cfa for getcallersp. Currently, the compiler lowers runtime.getcallersp to __builtin_frame_address(1). In the C side of the runtime, getcallersp is defined as __builtin_frame_address(0). They don't match. Further, neither of them actually returns the caller's SP. On x86_64, __builtin_frame_address(0) just returns the frame pointer. __builtin_frame_address(1) returns the memory content where the frame pointer points to, which is typically the caller's frame pointer but can also be garbage if the frame pointer is not enabled. This patch changes getcallersp to use __builtin_dwarf_cfa(), which returns the caller's SP at the call site. This matches the SP we get from unwinding the stack. Currently getcallersp is not used for anything real. It will be used for precise stack scan. Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed to mainline. Ian 2019-02-15 Cherry Zhang * go-gcc.cc (Gcc_backend::Gcc_backend): Define __builtin_dwarf_cfa instead of __builtin_frame_address. Index: gcc/go/go-gcc.cc === --- gcc/go/go-gcc.cc(revision 268369) +++ gcc/go/go-gcc.cc(working copy) @@ -734,8 +734,9 @@ Gcc_backend::Gcc_backend() this->define_builtin(BUILT_IN_RETURN_ADDRESS, "__builtin_return_address", NULL, t, false, false); - // The runtime calls __builtin_frame_address for runtime.getcallersp. - this->define_builtin(BUILT_IN_FRAME_ADDRESS, "__builtin_frame_address", + // The runtime calls __builtin_dwarf_cfa for runtime.getcallersp. + t = build_function_type_list(ptr_type_node, NULL_TREE); + this->define_builtin(BUILT_IN_DWARF_CFA, "__builtin_dwarf_cfa", NULL, t, false, false); // The runtime calls __builtin_extract_return_addr when recording Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 268948) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -1a74b8a22b2ff7f430729aa87ecb8cea7b5cdd70 +9605c2efd99aa9c744652a9153e208e0653b8596 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: gcc/go/gofrontend/expressions.cc === --- gcc/go/gofrontend/expressions.cc(revision 268923) +++ gcc/go/gofrontend/expressions.cc(working copy) @@ -9903,17 +9903,18 @@ Call_expression::do_lower(Gogo* gogo, Na && n == "getcallerpc") { static Named_object* builtin_return_address; + int arg = 0; return this->lower_to_builtin(_return_address, "__builtin_return_address", - 0); + ); } else if ((this->args_ == NULL || this->args_->size() == 0) && n == "getcallersp") { - static Named_object* builtin_frame_address; - return this->lower_to_builtin(_frame_address, - "__builtin_frame_address", - 1); + static Named_object* builtin_dwarf_cfa; + return this->lower_to_builtin(_dwarf_cfa, + "__builtin_dwarf_cfa", + NULL); } } } @@ -10031,21 +10032,24 @@ Call_expression::lower_varargs(Gogo* gog this->varargs_are_lowered_ = true; } -// Return a call to __builtin_return_address or __builtin_frame_address. +// Return a call to __builtin_return_address or __builtin_dwarf_cfa. Expression* Call_expression::lower_to_builtin(Named_object** pno, const char* name, - int arg) + int* arg) { if (*pno == NULL) -*pno = Gogo::declare_builtin_rf_address(name); +*pno = Gogo::declare_builtin_rf_address(name, arg != NULL); Location loc = this->location(); Expression* fn = Expression::make_func_reference(*pno, NULL, loc); - Expression* a = Expression::make_integer_ul(arg, NULL, loc); Expression_list *args = new Expression_list(); - args->push_back(a); + if (arg != NULL) +{ + Expression* a = Expression::make_integer_ul(*arg, NULL, loc); + args->push_back(a); +} Expression* call = Expression::make_call(fn, args, false, loc); // The builtin functions return void*, but the Go functions return uintptr. Index: gcc/go/gofrontend/expressions.h === --- gcc/go/gofrontend/expressions.h (revision 268369) +++ gcc/go/gofrontend/expressions.h (working copy) @@ -2356,7 +2356,7 @@ class Call_expression : public Expressio
[testsuite] Couple of g++.dg/asan tweaks
One of the tests in g++.dg/asan/asan_oob_test.cc uses unaligned memory accesses and g++.dg/asan/function-argument-3.C assumes a specific kind of calling conventions for vectors. Tested on SPARC64/Linux, applied on the mainline. 2019-02-15 Eric Botcazou * g++.dg/asan/asan_oob_test.cc: Skip OOB_int on SPARC. * g++.dg/asan/function-argument-3.C: Tweak for 32-bit SPARC. -- Eric BotcazouIndex: g++.dg/asan/asan_oob_test.cc === --- g++.dg/asan/asan_oob_test.cc (revision 268849) +++ g++.dg/asan/asan_oob_test.cc (working copy) @@ -68,9 +68,13 @@ TEST(AddressSanitizer, OOB_char) { OOBTest(); } +// The following test uses unaligned memory accesses + +#if !defined(__sparc__) TEST(AddressSanitizer, OOB_int) { OOBTest(); } +#endif TEST(AddressSanitizer, OOBRightTest) { for (size_t access_size = 1; access_size <= 8; access_size *= 2) { Index: g++.dg/asan/function-argument-3.C === --- g++.dg/asan/function-argument-3.C (revision 268849) +++ g++.dg/asan/function-argument-3.C (working copy) @@ -2,7 +2,16 @@ // { dg-shouldfail "asan" } // { dg-additional-options "-Wno-psabi" } +// On SPARC 32-bit, only vectors up to 8 bytes are passed in registers +#if defined(__sparc__) && !defined(__sparcv9) && !defined(__arch64__) +#define SMALL_VECTOR +#endif + +#ifdef SMALL_VECTOR +typedef int v4si __attribute__ ((vector_size (8))); +#else typedef int v4si __attribute__ ((vector_size (16))); +#endif static __attribute__ ((noinline)) int goo (v4si *a) @@ -19,10 +28,14 @@ foo (v4si arg) int main () { +#ifdef SMALL_VECTOR + v4si v = {1,2}; +#else v4si v = {1,2,3,4}; +#endif return foo (v); } // { dg-output "ERROR: AddressSanitizer: stack-buffer-overflow on address.*(\n|\r\n|\r)" } // { dg-output "READ of size . at.*" } -// { dg-output ".*'arg' \\(line 14\\) <== Memory access at offset \[0-9\]* overflows this variable.*" } +// { dg-output ".*'arg' \\(line 23\\) <== Memory access at offset \[0-9\]* overflows this variable.*" }
[SPARC] Small ASAN fixes
This automatically passes -funwind-tables when ASAN is used on Linux, as done for other architectures, and also adjusts the shadow offset in 64-bit mode. Tested on SPARC64/Linux, applied on the mainline. 2019-02-15 Eric Botcazou * config/sparc/linux.h (ASAN_CC1_SPEC): Define. (CC1_SPEC): Use GNU_USER_TARGET_CC1_SPEC and ASAN_CC1_SPEC. * config/sparc/linux64.h (ASAN_CC1_SPEC): Likewise. (CC1_SPEC): Likewise. * config/sparc/sparc.c (sparc_asan_shadow_offset): Adjust for 64-bit. -- Eric BotcazouIndex: config/sparc/linux.h === --- config/sparc/linux.h (revision 268849) +++ config/sparc/linux.h (working copy) @@ -54,10 +54,11 @@ extern const char *host_detect_local_cpu #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS -/* This is for -profile to use -lc_p instead of -lc. */ -#undef CC1_SPEC -#define CC1_SPEC "%{profile:-p} \ -" +#undef ASAN_CC1_SPEC +#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}" + +#undef CC1_SPEC +#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC #undef SIZE_TYPE #define SIZE_TYPE "unsigned int" Index: config/sparc/linux64.h === --- config/sparc/linux64.h (revision 268849) +++ config/sparc/linux64.h (working copy) @@ -143,24 +143,25 @@ extern const char *host_detect_local_cpu #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS -#undef CC1_SPEC +#undef ASAN_CC1_SPEC +#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}" + +#undef CC1_SPEC #if DEFAULT_ARCH32_P -#define CC1_SPEC "%{profile:-p} \ -%{m32:%{m64:%emay not use both -m32 and -m64}} \ +#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC \ +"%{m32:%{m64:%emay not use both -m32 and -m64}} \ %{m64:-mptr64 -mstack-bias -mlong-double-128 \ %{!mcpu*:-mcpu=ultrasparc} \ - %{!mno-vis:%{!mcpu=v9:-mvis}}} \ -" + %{!mno-vis:%{!mcpu=v9:-mvis}}}" #else -#define CC1_SPEC "%{profile:-p} \ -%{m32:%{m64:%emay not use both -m32 and -m64}} \ +#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC \ +"%{m32:%{m64:%emay not use both -m32 and -m64}} \ %{m32:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \ %{!mcpu*:-mcpu=cypress}} \ %{mv8plus:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \ %{!mcpu*:-mcpu=v9}} \ %{!m32:%{!mcpu*:-mcpu=ultrasparc}} \ -%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}} \ -" +%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}}" #endif /* Support for a compile-time default CPU, et cetera. The rules are: Index: config/sparc/sparc.c === --- config/sparc/sparc.c (revision 268849) +++ config/sparc/sparc.c (working copy) @@ -12524,7 +12524,7 @@ sparc_init_machine_status (void) static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void) { - return TARGET_ARCH64 ? HOST_WIDE_INT_C (0x7fff8000) : (HOST_WIDE_INT_1 << 29); + return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29); } /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)
On 2/15/19 3:46 PM, Eric Botcazou wrote: I'm ready to commit the patch once it's approved, and have been since the day the problem was reported. Maybe CCing whoever approved the previous patch would help? I just pinged the patch a few minutes ago and CC'd Jason. Sorry about any trouble this has caused. Martin
Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)
> I'm ready to commit the patch once it's approved, and have been since > the day the problem was reported. Maybe CCing whoever approved the previous patch would help? -- Eric Botcazou
Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)
Ping: https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00857.html Jason, since you approved the original patch, can you please also review this one? Due to the Ada test breakage there seems to be some anxiety about getting the problem corrected soon. Thanks Martin On 2/11/19 6:13 PM, Martin Sebor wrote: The attached patch removes the assumption introduced earlier today in my fix for bug 87996 that the valid_constant_size_p argument is a constant expression. I couldn't come up with a C/C++ test case where this isn't true but apparently it can happen in Ada which I inadvertently didn't build. I still haven't figured out what I have to do to build it on my Fedora 29 machine so I tested this change by hand (besides bootstrapping w/o Ada). The first set of instructions Google gives me don't seem to do it: https://fedoraproject.org/wiki/Features/Ada_developer_tools and neither does dnf install gcc-gnat as explained on our Wiki: https://gcc.gnu.org/wiki/GNAT If someone knows the magic chant I would be grateful (it might be helpful to also update the Wiki page -- the last change to it was made in 2012; I volunteer to do that). Martin
[PATCH, og8] Don't rescan "attach" node for dereferenced struct member
Hi, The following (og8 branch) patch added support for attaching/detaching from dereferenced struct members: https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01778.html Unfortunately I made a mistake in the portion of that patch that inserts new alloc and firstprivate_pointer nodes for the struct base, meaning that the node rewritten to an attach operation would be scanned again. This is both unnecessary, and can cause problems in some circumstances. Tested with offloading to nvptx, no regressions and the new test passes. I will apply (to the og8 branch) shortly. Thanks, Julian ChangeLog gcc/ * gimplify.c (gimplify_scan_omp_clauses): Avoid scanning 'c' again after creating base-pointer nodes for dereferenced struct. gcc/testsuite/ * gfortran.dg/goacc/derived-types-2.f90: New. commit e374d415801588435d62ac214e0313ffd3ef2198 Author: Julian Brown Date: Thu Feb 14 16:40:21 2019 -0800 [og8] Don't rescan "attach" node for dereferenced struct member gcc/ * gimplify.c (gimplify_scan_omp_clauses): Avoid scanning 'c' again after creating base-pointer nodes for dereferenced struct. gcc/testsuite/ * gfortran.dg/goacc/derived-types-2.f90: New. diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 8bf11eb659e..2ff5b68e0cc 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -8289,8 +8289,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, *list_p = c2; OMP_CLAUSE_CHAIN (c2) = c3; OMP_CLAUSE_CHAIN (c3) = c; - c = c3; - list_p = _CLAUSE_CHAIN (c3); struct_deref_set->add (decl); } diff --git a/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90 b/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90 new file mode 100644 index 000..d01583fac89 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90 @@ -0,0 +1,14 @@ +module bar + type :: type1 + real(8), pointer, public :: p(:) => null() + end type + type :: type2 + class(type1), pointer :: p => null() + end type +end module + +subroutine foo (var) + use bar + type(type2), intent(inout) :: var + !$acc enter data create(var%p%p) +end subroutine
Re: [patch] Disable store merging in asan_expand_mark_ifn
> > OK, revised patch attached. I have manually verified that it yields the > > expected result for an array of long doubles on 64-bit SPARC. > > > > > > 2019-02-12 Eric Botcazou > > > > * asan.c (asan_expand_mark_ifn): Take into account the alignment of > > the object to pick the size of stores on strict-alignment platforms. > > Ok, thanks. Glad you insisted in the end, because I have ASAN working on SPARC64/Linux, but only after fixing another bug on 64-bit strict-alignment platforms: /* Align base if target is STRICT_ALIGNMENT. */ if (STRICT_ALIGNMENT) base = expand_binop (Pmode, and_optab, base, gen_int_mode (-((GET_MODE_ALIGNMENT (SImode) << ASAN_SHADOW_SHIFT) / BITS_PER_UNIT), Pmode), NULL_RTX, 1, OPTAB_DIRECT); GET_MODE_ALIGNMENT is unsigned int so this zero-extends to unsigned long... Tested on 32-bit and 64-bit SPARC/Linux, applied on mainline as obvious. 2019-02-15 Eric Botcazou * asan.c (asan_emit_stack_protection): Use full-sized mask to align the base address on 64-bit strict-alignment platforms. -- Eric BotcazouIndex: asan.c === --- asan.c (revision 268849) +++ asan.c (working copy) @@ -1440,13 +1441,15 @@ asan_emit_stack_protection (rtx base, rt base_align_bias = ((asan_frame_size + alignb - 1) & ~(alignb - HOST_WIDE_INT_1)) - asan_frame_size; } + /* Align base if target is STRICT_ALIGNMENT. */ if (STRICT_ALIGNMENT) -base = expand_binop (Pmode, and_optab, base, - gen_int_mode (-((GET_MODE_ALIGNMENT (SImode) - << ASAN_SHADOW_SHIFT) - / BITS_PER_UNIT), Pmode), NULL_RTX, - 1, OPTAB_DIRECT); +{ + const HOST_WIDE_INT align + = (GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT) << ASAN_SHADOW_SHIFT; + base = expand_binop (Pmode, and_optab, base, gen_int_mode (-align, Pmode), + NULL_RTX, 1, OPTAB_DIRECT); +} if (use_after_return_class == -1 && pbase) emit_move_insn (pbase, base); @ -1534,7 +1548,7 @@ asan_emit_stack_protection (rtx base, rt shadow_mem = gen_rtx_MEM (SImode, shadow_base); set_mem_alias_set (shadow_mem, asan_shadow_set); if (STRICT_ALIGNMENT) -set_mem_align (shadow_mem, (GET_MODE_ALIGNMENT (SImode))); +set_mem_align (shadow_mem, GET_MODE_ALIGNMENT (SImode)); prev_offset = base_offset; asan_redzone_buffer rz_buffer (shadow_mem, prev_offset);
Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)
On 2/15/19 12:24 AM, Eric Botcazou wrote: The attached patch removes the assumption introduced earlier today in my fix for bug 87996 that the valid_constant_size_p argument is a constant expression. I couldn't come up with a C/C++ test case where this isn't true but apparently it can happen in Ada which I inadvertently didn't build. Can we do something here? Our internal testers have been down for 3 days because of this blunder... I'm ready to commit the patch once it's approved, and have been since the day the problem was reported. Martin
Go patch committed: Don't use a nil check for the write barrier
This patch to the Go frontend by Than McIntosh tweaks the recipe for generating writeBarrier loads to insure that the dereference expr is marked as not requiring a nil check. This should fix gcc PR 89368. Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed to mainline. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 268941) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -0563f2d018cdb2cd685c254bac5ceb38396d0a27 +1a74b8a22b2ff7f430729aa87ecb8cea7b5cdd70 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: gcc/go/gofrontend/wb.cc === --- gcc/go/gofrontend/wb.cc (revision 268923) +++ gcc/go/gofrontend/wb.cc (working copy) @@ -904,7 +904,8 @@ Gogo::check_write_barrier(Block* enclosi ref = Expression::make_unary(OPERATOR_AND, ref, loc); ref = Expression::make_cast(unsafe_pointer_type, ref, loc); ref = Expression::make_cast(puint32_type, ref, loc); - ref = Expression::make_unary(OPERATOR_MULT, ref, loc); + ref = Expression::make_dereference(ref, + Expression::NIL_CHECK_NOT_NEEDED, loc); Expression* zero = Expression::make_integer_ul(0, ref->type(), loc); Expression* cond = Expression::make_binary(OPERATOR_EQEQ, ref, zero, loc);
[PR fortran/89077, patch, part 3] - ICE using * as len specifier for character parameter
The attached patch is the third in a series for the above PR. This one fixes erroneous padding with garbage characters in some declaration and initialization expressions. The issue here was that expr->representation is set when either Hollerith strings are used or a TRANSFER statement is involved. As a result, the original string could be used with trailing garbage instead of the properly space-padded string. The patch simply clears expr->representation in that case. Regtested on x86_64-pc-linux-gnu. OK for trunk? Thanks, Harald 2019-02-15 Harald Anlauf PR fortran/89077 * decl.c (gfc_set_constant_character_len): Clear original string representation after padding has been performed to target length. 2019-02-15 Harald Anlauf PR fortran/89077 * gfortran.dg/transfer_simplify_12.f90: New test. Index: gcc/fortran/decl.c === --- gcc/fortran/decl.c (revision 268946) +++ gcc/fortran/decl.c (working copy) @@ -1754,6 +1754,14 @@ free (expr->value.character.string); expr->value.character.string = s; expr->value.character.length = len; + /* If explicit representation was given, clear it +as it is no longer needed after padding. */ + if (expr->representation.length) + { + expr->representation.length = 0; + free (expr->representation.string); + expr->representation.string = NULL; + } } } Index: gcc/testsuite/gfortran.dg/transfer_simplify_12.f90 === --- gcc/testsuite/gfortran.dg/transfer_simplify_12.f90 (nonexistent) +++ gcc/testsuite/gfortran.dg/transfer_simplify_12.f90 (working copy) @@ -0,0 +1,27 @@ +! { dg-do run } +! { dg-options "-O -std=legacy" } +! +! Test fixes for some findings while resolving PR fortran/89077 + +program test + implicit none + integer :: i + character(*) ,parameter :: s = 'abcdef' ! Length will be 6 + character(*) ,parameter :: h = 6Habcdef! Length will be 8 (Hollerith!) + character(10) ,parameter :: k = 6Habcdef + character(10) ,parameter :: t = transfer (s, s) + character(10) ,save :: u = transfer (s, s) + character(10) ,parameter :: v = transfer (h, h) + character(10) ,save :: w = transfer (h, h) + character(10) ,parameter :: x = transfer ([(s(i:i),i=len(s),1,-1)], s) + character(10) ,save :: y = transfer ([(s(i:i),i=len(s),1,-1)], s) + if (len (h) /= 8) stop 1 + if (h /= s) stop 2 + if (k /= s) stop 3 + if (t /= s) stop 4 + if (u /= s) stop 5 + if (v /= s) stop 6 + if (w /= s) stop 7 + if (x /= "fedcba") stop 8 + if (y /= x) stop 9 +end program test
[PATCH] i386: Fix ')' in VALID_MMX_REG_MODE
Replace "(MODE == V1DImode)" with "(MODE) == V1DImode". * config/i386/i386.h (VALID_MMX_REG_MODE): Correct the misplaced ')'. --- gcc/ChangeLog | 5 + gcc/config/i386/i386.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d1083735e26..96f8679e8f9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2019-02-15 H.J. Lu + + * config/i386/i386.h (VALID_MMX_REG_MODE): Correct the misplaced + ')'. + 2019-02-15 Uroš Bizjak * config/i386/darwin.h (TARGET_FPMATH_DEFAULT_P): New define. diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d9039060997..4fd8bc40a34 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1158,7 +1158,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); ((MODE) == V2SFmode || (MODE) == SFmode) #define VALID_MMX_REG_MODE(MODE) \ - ((MODE == V1DImode) || (MODE) == DImode \ + ((MODE) == V1DImode || (MODE) == DImode \ || (MODE) == V2SImode || (MODE) == SImode \ || (MODE) == V4HImode || (MODE) == V8QImode) -- 2.20.1
Re: libgo patch committed: Add S/390 support to internal/cpu package
On 15.02.19 15:52, Ian Lance Taylor wrote: > This patch by Robin Dapp adds S/390 support to the internal/cpu > package. This partially addresses PR 89123. I bootstrapped it on > x86_64-pc-linux-gnu, which means little. Committed to mainline. fails in the -m31 multilib variant with libtool: compile: /<>/build/./gcc/xgcc -B/<>/build/./gcc/ -B/usr/s390x-linux-gnu/bin/ -B/usr/s390x-linux-gnu/lib/ -isystem /usr/s390x-linux-gnu/include -isystem /usr/s390x-linux-gnu/sys-include -isys tem /<>/build/sys-include -m31 -DHAVE_CONFIG_H -I. -I../../../../src/libgo -I ../../../../src/libgo/ runtime -I../../../../src/libgo/../libffi/include -I../libffi/include -pthread -L../libatomic/.libs -fexceptions -fnon-call-exceptions -fno-stack-protector -fsplit-stack -Wall -Wextra -Wwrite-strings -Wcast-qual -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -I ../../../../src/libgo/../libgcc -I ../../../../src/libgo/../libback trace -I ../../../gcc/include -g -O2 -m31 -c ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c -fPIC -DPIC -o in ternal/cpu/.libs/cpu_gccgo.o ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c: Assembler messages: ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:91: Error: Unrecognized opcode: `lghi' ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:105: Error: Unrecognized opcode: `lghi' ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:119: Error: Unrecognized opcode: `lghi' ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:134: Error: Unrecognized opcode: `lghi' ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:149: Error: Unrecognized opcode: `lghi' ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:164: Error: Unrecognized opcode: `lghi' ../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:179: Error: Unrecognized opcode: `lghi' make[10]: *** [Makefile:2899: internal/cpu/cpu_gccgo.lo] Error 1 make[10]: *** Waiting for unfinished jobs make[10]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo' make[9]: *** [Makefile:2242: all-recursive] Error 1 make[9]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo' make[8]: *** [Makefile:1167: all] Error 2 make[8]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo' make[7]: *** [Makefile:3062: multi-do] Error 1 using binutils 2.32
Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE
On Fri, Feb 15, 2019 at 7:20 PM H.J. Lu wrote: > > I went through the code again, and looks OK in general, modulo > > mmx_nonimmediate_operand issue and a couple of minor issues. > > > > Please substitute nonimmediate_operand predicate with > > mmx_nonimmediate_operand in expanders and insn patterns. Please note > > Can we keep nonimmediate_operand in expanders, like No, expander should also be changed. The way expanders are called is - if the operand can't satisfy the predicate, then move it to a register. So, for TARGET_MMX_WITH_SSE, we allow memory operand which isn't allowed by relevant insn pattern -> ICE. There is nothing RA can do here. Operand type, produced by expander must match predicate in the insn pattern to satisfy insn pattern. Otherwise, the compiler will ICE way before RA comes into play. Also, in the insn pattern, the constraints must allow a subset of an operand predicate if we want RA to fixup the operand. Uros. > (define_expand "3" > [(set (match_operand:MMXMODEI 0 "register_operand") > (plusminus:MMXMODEI > (match_operand:MMXMODEI 1 "nonimmediate_operand") > (match_operand:MMXMODEI 2 "nonimmediate_operand")))] > "TARGET_MMX_WITH_SSE" > "ix86_fixup_binary_operands_no_copy (, mode, operands);") > > (define_insn "*mmx_3" > [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv") > (plusminus:MMXMODEI8 > (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv") > (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))] > "(TARGET_MMX || TARGET_MMX_WITH_SSE) >&& ix86_binary_operator_ok (, mode, operands)" > "@ >p\t{%2, %0|%0, %2} >p\t{%2, %0|%0, %2} >vp\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") >(set_attr "type" "mmxadd,sseadd,sseadd") >(set_attr "mode" "DI,TI,TI")]) > > Can RA do the right thing? > > > that the proposed convention is to name the operand > > register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest > > we name the predicate in this way. > > I will rename it to register_mmxmem_operand. > > > There is an issue with a change to emms pattern. > > > > And let's remove _mm_empty () calls from testcases; they complicate > > things too much for no apparent benefit. > > Will do. > > > With those issues fixed, the patchset is OK for gcc-10 when it opens. > > > > Uros. > > > > > H.J. Lu (41): > > > i386: Allow MMX register modes in SSE registers > > > i386: Add mmx_nonimmediate_operand > > > i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 > > > i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX > > > i386: Emulate MMX plusminus/sat_plusminus with SSE > > > i386: Emulate MMX mulv4hi3 with SSE > > > i386: Emulate MMX smulv4hi3_highpart with SSE > > > i386: Emulate MMX mmx_pmaddwd with SSE > > > i386: Emulate MMX ashr3/3 with SSE > > > i386: Emulate MMX 3 with SSE > > > i386: Emulate MMX mmx_andnot3 with SSE > > > i386: Emulate MMX mmx_eq/mmx_gt3 with SSE > > > i386: Emulate MMX vec_dupv2si with SSE > > > i386: Emulate MMX pshufw with SSE > > > i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE > > > i386: Emulate MMX sse_cvtpi2ps with SSE > > > i386: Emulate MMX mmx_pextrw with SSE > > > i386: Emulate MMX mmx_pinsrw with SSE > > > i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE > > > i386: Emulate MMX mmx_pmovmskb with SSE > > > i386: Emulate MMX mmx_umulv4hi3_highpart with SSE > > > i386: Emulate MMX maskmovq with SSE2 maskmovdqu > > > i386: Emulate MMX mmx_uavgv8qi3 with SSE > > > i386: Emulate MMX mmx_uavgv4hi3 with SSE > > > i386: Emulate MMX mmx_psadbw with SSE > > > i386: Emulate MMX movntq with SSE2 movntidi > > > i386: Emulate MMX umulv1siv1di3 with SSE2 > > > i386: Make _mm_empty () as NOP when MMX is disabled > > > i386: Emulate MMX ssse3_phwv4hi3 with SSE > > > i386: Emulate MMX ssse3_phdv2si3 with SSE > > > i386: Emulate MMX ssse3_pmaddubsw with SSE > > > i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE > > > i386: Emulate MMX pshufb with SSE version > > > i386: Emulate MMX ssse3_psign3 with SSE > > > i386: Emulate MMX ssse3_palignrdi with SSE > > > i386: Emulate MMX abs2 with SSE > > > i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE > > > i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE > > > i386: Allow MMX intrinsic emulation with SSE > > > i386: Enable TM MMX intrinsics with SSE2 > > > i386: Add tests for MMX intrinsic emulations with SSE > > > > > > Uros Bizjak (1): > > > Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE > > > > > > gcc/config/i386/constraints.md| 6 + > > > gcc/config/i386/i386-builtin.def | 126 +-- > > > gcc/config/i386/i386-c.c | 2 + > > > gcc/config/i386/i386-protos.h | 4 + > > > gcc/config/i386/i386.c| 189 +++- > > > gcc/config/i386/i386.h
Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE
On Fri, Feb 15, 2019 at 9:50 AM Uros Bizjak wrote: > > On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu wrote: > > > > On x86-64, since __m64 is returned and passed in XMM registers, we can > > emulate MMX intrinsics with SSE instructions. To support it, we added > > > > #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2) > > > > ;; Define instruction set of MMX instructions > > (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" > > (const_string "base")) > > > > (eq_attr "mmx_isa" "native") > >(symbol_ref "!TARGET_MMX_WITH_SSE") > > (eq_attr "mmx_isa" "x64") > >(symbol_ref "TARGET_MMX_WITH_SSE") > > (eq_attr "mmx_isa" "x64_avx") > >(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") > > (eq_attr "mmx_isa" "x64_noavx") > >(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX") > > > > We added SSE emulation to MMX patterns and disabled MMX alternatives with > > TARGET_MMX_WITH_SSE. > > > > Most of MMX instructions have equivalent SSE versions and results of some > > SSE versions need to be reshuffled to the right order for MMX. Thee are > > couple tricky cases: > > > > 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent. We emulate MMX > > maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the > > mask operand and handle unmapped bits 64:127 at memory address by > > adjusting source and mask operands together with memory address. > > > > 2. MMX movntq is emulated with SSE2 DImode movnti, which is available > > in 64-bit mode. > > > > 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index. > > SSE emulation must clear the bit 4 in the shuffle control mask. > > > > 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve > > the upper 64 bits of destination XMM register. > > > > Tests are also added to check each SSE emulation of MMX intrinsics. > > > > There are no regressions on i686 and x86-64. For x86-64, GCC is also > > tested with > > > > --with-arch=native --with-cpu=native > > > > on AVX2 and AVX512F machines. > > I went through the code again, and looks OK in general, modulo > mmx_nonimmediate_operand issue and a couple of minor issues. > > Please substitute nonimmediate_operand predicate with > mmx_nonimmediate_operand in expanders and insn patterns. Please note Can we keep nonimmediate_operand in expanders, like (define_expand "3" [(set (match_operand:MMXMODEI 0 "register_operand") (plusminus:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand") (match_operand:MMXMODEI 2 "nonimmediate_operand")))] "TARGET_MMX_WITH_SSE" "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*mmx_3" [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv") (plusminus:MMXMODEI8 (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv") (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (, mode, operands)" "@ p\t{%2, %0|%0, %2} p\t{%2, %0|%0, %2} vp\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") (set_attr "type" "mmxadd,sseadd,sseadd") (set_attr "mode" "DI,TI,TI")]) Can RA do the right thing? > that the proposed convention is to name the operand > register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest > we name the predicate in this way. I will rename it to register_mmxmem_operand. > There is an issue with a change to emms pattern. > > And let's remove _mm_empty () calls from testcases; they complicate > things too much for no apparent benefit. Will do. > With those issues fixed, the patchset is OK for gcc-10 when it opens. > > Uros. > > > H.J. Lu (41): > > i386: Allow MMX register modes in SSE registers > > i386: Add mmx_nonimmediate_operand > > i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 > > i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX > > i386: Emulate MMX plusminus/sat_plusminus with SSE > > i386: Emulate MMX mulv4hi3 with SSE > > i386: Emulate MMX smulv4hi3_highpart with SSE > > i386: Emulate MMX mmx_pmaddwd with SSE > > i386: Emulate MMX ashr3/3 with SSE > > i386: Emulate MMX 3 with SSE > > i386: Emulate MMX mmx_andnot3 with SSE > > i386: Emulate MMX mmx_eq/mmx_gt3 with SSE > > i386: Emulate MMX vec_dupv2si with SSE > > i386: Emulate MMX pshufw with SSE > > i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE > > i386: Emulate MMX sse_cvtpi2ps with SSE > > i386: Emulate MMX mmx_pextrw with SSE > > i386: Emulate MMX mmx_pinsrw with SSE > > i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE > > i386: Emulate MMX mmx_pmovmskb with SSE > > i386: Emulate MMX mmx_umulv4hi3_highpart with SSE > > i386: Emulate MMX maskmovq with SSE2 maskmovdqu > > i386: Emulate MMX mmx_uavgv8qi3 with SSE > > i386: Emulate MMX mmx_uavgv4hi3 with SSE > > i386: Emulate MMX mmx_psadbw
[PATCH, i386]: Add missing TARGET_FPMATH_DEFAULT_P to darwin.h
Darwin defines its own TARGET_FPMATH_DEFAULT, which should be accompanied by corresponding TARGET_FPMATH_DEFAULT_P. Patch adds missing define. While looking around, I also fixed various whitespace issues in the header. BTW: The header file still defines TARGET_64BIT which is horribly out of date. Someone should introduce correct multilib support to Darwin to bring it in line with Linux and Solaris, so these defines could be removed in favour of generic ones in i386.h. 2019-02-15 Uroš Bizjak * config/i386/darwin.h (TARGET_FPMATH_DEFAULT_P): New define. Tested by building a crosscompiler to x86_64-apple-darwin18. Committed to mainline SVN as obvious. Uros. diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h index a63841ca5554..d8e72ec69a57 100644 --- a/gcc/config/i386/darwin.h +++ b/gcc/config/i386/darwin.h @@ -25,10 +25,10 @@ along with GCC; see the file COPYING3. If not see #undef DARWIN_X86 #define DARWIN_X86 1 -#undef TARGET_64BIT -#undef TARGET_64BIT_P +#undef TARGET_64BIT #define TARGET_64BIT TARGET_ISA_64BIT -#defineTARGET_64BIT_P(x) TARGET_ISA_64BIT_P(x) +#undef TARGET_64BIT_P +#define TARGET_64BIT_P(x) TARGET_ISA_64BIT_P(x) #ifdef IN_LIBGCC2 #undef TARGET_64BIT @@ -70,14 +70,15 @@ along with GCC; see the file COPYING3. If not see #undef TARGET_FPMATH_DEFAULT #define TARGET_FPMATH_DEFAULT (TARGET_SSE ? FPMATH_SSE : FPMATH_387) +#undef TARGET_FPMATH_DEFAULT_P +#define TARGET_FPMATH_DEFAULT_P(x) \ + (TARGET_SSE_P(x) ? FPMATH_SSE : FPMATH_387) #define TARGET_OS_CPP_BUILTINS()\ - do\ -{ \ - builtin_define ("__LITTLE_ENDIAN__"); \ - darwin_cpp_builtins (pfile); \ -} \ - while (0) + do { \ +builtin_define ("__LITTLE_ENDIAN__"); \ +darwin_cpp_builtins (pfile); \ + } while (0) #undef PTRDIFF_TYPE #define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int") @@ -121,7 +122,7 @@ extern int darwin_emit_branch_islands; than 128 bits for Darwin, but it's easier to up the alignment if it's below the minimum. */ #undef PREFERRED_STACK_BOUNDARY -#define PREFERRED_STACK_BOUNDARY \ +#define PREFERRED_STACK_BOUNDARY \ MAX (128, ix86_preferred_stack_boundary) /* We want -fPIC by default, unless we're using -static to compile for @@ -179,15 +180,15 @@ extern int darwin_emit_branch_islands; and returns float values in the 387. */ #undef TARGET_SUBTARGET_DEFAULT -#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE) +#define TARGET_SUBTARGET_DEFAULT \ + (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE) /* For darwin we want to target specific processor features as a minimum, but these unfortunately don't correspond to a specific processor. */ #undef TARGET_SUBTARGET32_ISA_DEFAULT -#define TARGET_SUBTARGET32_ISA_DEFAULT (OPTION_MASK_ISA_MMX\ - | OPTION_MASK_ISA_SSE \ - | OPTION_MASK_ISA_SSE2 \ - | OPTION_MASK_ISA_SSE3) +#define TARGET_SUBTARGET32_ISA_DEFAULT \ + (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE \ + | OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3) #undef TARGET_SUBTARGET64_ISA_DEFAULT #define TARGET_SUBTARGET64_ISA_DEFAULT TARGET_SUBTARGET32_ISA_DEFAULT @@ -209,15 +210,16 @@ extern int darwin_emit_branch_islands; #define SUBTARGET_ENCODE_SECTION_INFO darwin_encode_section_info #undef ASM_OUTPUT_ALIGN -#define ASM_OUTPUT_ALIGN(FILE,LOG) \ - do { if ((LOG) != 0) \ -{ \ - if (in_section == text_section) \ -fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \ - else \ -fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG)); \ -} \ -} while (0) +#define ASM_OUTPUT_ALIGN(FILE,LOG)\ + do {\ +if ((LOG) != 0) \ + { \ + if (in_section == text_section)\ + fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \ + else \ + fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG));\ + } \ + } while (0) /* Darwin x86 assemblers support the .ident directive. */ @@ -227,16 +229,16 @@ extern int darwin_emit_branch_islands; /* Darwin profiling -- call mcount. */
Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE
On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu wrote: > > On x86-64, since __m64 is returned and passed in XMM registers, we can > emulate MMX intrinsics with SSE instructions. To support it, we added > > #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2) > > ;; Define instruction set of MMX instructions > (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" > (const_string "base")) > > (eq_attr "mmx_isa" "native") >(symbol_ref "!TARGET_MMX_WITH_SSE") > (eq_attr "mmx_isa" "x64") >(symbol_ref "TARGET_MMX_WITH_SSE") > (eq_attr "mmx_isa" "x64_avx") >(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") > (eq_attr "mmx_isa" "x64_noavx") >(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX") > > We added SSE emulation to MMX patterns and disabled MMX alternatives with > TARGET_MMX_WITH_SSE. > > Most of MMX instructions have equivalent SSE versions and results of some > SSE versions need to be reshuffled to the right order for MMX. Thee are > couple tricky cases: > > 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent. We emulate MMX > maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the > mask operand and handle unmapped bits 64:127 at memory address by > adjusting source and mask operands together with memory address. > > 2. MMX movntq is emulated with SSE2 DImode movnti, which is available > in 64-bit mode. > > 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index. > SSE emulation must clear the bit 4 in the shuffle control mask. > > 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve > the upper 64 bits of destination XMM register. > > Tests are also added to check each SSE emulation of MMX intrinsics. > > There are no regressions on i686 and x86-64. For x86-64, GCC is also > tested with > > --with-arch=native --with-cpu=native > > on AVX2 and AVX512F machines. I went through the code again, and looks OK in general, modulo mmx_nonimmediate_operand issue and a couple of minor issues. Please substitute nonimmediate_operand predicate with mmx_nonimmediate_operand in expanders and insn patterns. Please note that the proposed convention is to name the operand register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest we name the predicate in this way. There is an issue with a change to emms pattern. And let's remove _mm_empty () calls from testcases; they complicate things too much for no apparent benefit. With those issues fixed, the patchset is OK for gcc-10 when it opens. Uros. > H.J. Lu (41): > i386: Allow MMX register modes in SSE registers > i386: Add mmx_nonimmediate_operand > i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 > i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX > i386: Emulate MMX plusminus/sat_plusminus with SSE > i386: Emulate MMX mulv4hi3 with SSE > i386: Emulate MMX smulv4hi3_highpart with SSE > i386: Emulate MMX mmx_pmaddwd with SSE > i386: Emulate MMX ashr3/3 with SSE > i386: Emulate MMX 3 with SSE > i386: Emulate MMX mmx_andnot3 with SSE > i386: Emulate MMX mmx_eq/mmx_gt3 with SSE > i386: Emulate MMX vec_dupv2si with SSE > i386: Emulate MMX pshufw with SSE > i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE > i386: Emulate MMX sse_cvtpi2ps with SSE > i386: Emulate MMX mmx_pextrw with SSE > i386: Emulate MMX mmx_pinsrw with SSE > i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE > i386: Emulate MMX mmx_pmovmskb with SSE > i386: Emulate MMX mmx_umulv4hi3_highpart with SSE > i386: Emulate MMX maskmovq with SSE2 maskmovdqu > i386: Emulate MMX mmx_uavgv8qi3 with SSE > i386: Emulate MMX mmx_uavgv4hi3 with SSE > i386: Emulate MMX mmx_psadbw with SSE > i386: Emulate MMX movntq with SSE2 movntidi > i386: Emulate MMX umulv1siv1di3 with SSE2 > i386: Make _mm_empty () as NOP when MMX is disabled > i386: Emulate MMX ssse3_phwv4hi3 with SSE > i386: Emulate MMX ssse3_phdv2si3 with SSE > i386: Emulate MMX ssse3_pmaddubsw with SSE > i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE > i386: Emulate MMX pshufb with SSE version > i386: Emulate MMX ssse3_psign3 with SSE > i386: Emulate MMX ssse3_palignrdi with SSE > i386: Emulate MMX abs2 with SSE > i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE > i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE > i386: Allow MMX intrinsic emulation with SSE > i386: Enable TM MMX intrinsics with SSE2 > i386: Add tests for MMX intrinsic emulations with SSE > > Uros Bizjak (1): > Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE > > gcc/config/i386/constraints.md| 6 + > gcc/config/i386/i386-builtin.def | 126 +-- > gcc/config/i386/i386-c.c | 2 + > gcc/config/i386/i386-protos.h | 4 + > gcc/config/i386/i386.c| 189 +++- > gcc/config/i386/i386.h| 2 + >
Re: [PATCH 28/42] i386: Make _mm_empty () as NOP when MMX is disabled
On Fri, Feb 15, 2019 at 3:03 PM H.J. Lu wrote: > > With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP > when MMX is disabled. > > PR target/89021 > * config/i386/mmx.md (EMMS): Also allow TARGET_MMX_WITH_SSE. > (mmx_): Generate "" only when MMX is enabled. Better rename the pattern to "*mmx_" and introduce a new expander: (define_insn "mmx_" [(unspec_volatile [(const_int 0)] EMMS) (clobber (reg:XF ST0_REG)) (clobber (reg:XF ST1_REG)) (clobber (reg:XF ST2_REG)) (clobber (reg:XF ST3_REG)) (clobber (reg:XF ST4_REG)) (clobber (reg:XF ST5_REG)) (clobber (reg:XF ST6_REG)) (clobber (reg:XF ST7_REG)) (clobber (reg:DI MM0_REG)) (clobber (reg:DI MM1_REG)) (clobber (reg:DI MM2_REG)) (clobber (reg:DI MM3_REG)) (clobber (reg:DI MM4_REG)) (clobber (reg:DI MM5_REG)) (clobber (reg:DI MM6_REG)) (clobber (reg:DI MM7_REG))] "TARGET_MMX || TARGET_MMX_WITH_SSE" { if (!TARGET_MMX) { emit_insn (gen_nop ()); DONE; } }) This way, the compiler won't bother with {f,}emms when there are no MMX registers. Uros. > --- > gcc/config/i386/mmx.md | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index d662663a445..eaca71d5750 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -1839,7 +1839,7 @@ > (set_attr "mode" "DI")]) > > (define_int_iterator EMMS > - [(UNSPECV_EMMS "TARGET_MMX") > + [(UNSPECV_EMMS "TARGET_MMX || TARGET_MMX_WITH_SSE") > (UNSPECV_FEMMS "TARGET_3DNOW")]) > > (define_int_attr emms > @@ -1865,7 +1865,9 @@ > (clobber (reg:DI MM6_REG)) > (clobber (reg:DI MM7_REG))] >"" > - "" > +{ > + return TARGET_MMX ? "" : ""; > > +} >[(set_attr "type" "mmx") > (set_attr "modrm" "0") > (set_attr "memory" "none")]) > -- > 2.20.1 >
[Committed][PATCH][GCC][Arm] Remove alternative from neon_softfp_fp16 directive.
Hi All, There's a bit of a disconnect between the feature flags that don't test the fpu and ones that do when the test itself also forces an architecture. The forcing of the architecture would change the defaults and without explicitly giving the correct fpu again the test would fail. I don't see a good way to solve this problem, really the feature tests should ideally contain the extra options the test adds too, but for this specific case it can be solved by always testing the fpu explicitly. Committed under the GCC obvious Thanks, Tamar gcc/testsuite/ChangeLog: 2019-02-15 Tamar Christina * lib/target-supports.exp (check_effective_target_arm_neon_softfp_fp16_ok_nocache): Drop non-fpu checking alternative. -- diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 1d237d4cd664924cc580cff67a563230b3fe9571..5d8ba4436ac1ad29da57802f2465d05712c8e8e7 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3797,7 +3797,6 @@ proc check_effective_target_arm_neon_softfp_fp16_ok_nocache { } { if { [check_effective_target_arm32] && [check_effective_target_arm_neon_ok] } { foreach flags {"-mfpu=neon-fp16 -mfloat-abi=softfp" - "-mfloat-abi=softfp -mfp16-format=ieee" "-mfpu=neon-fp16 -mfloat-abi=softfp -mfp16-format=ieee"} { if { [check_no_compiler_messages_nocache arm_neon_softfp_fp16_ok object { #include "arm_neon.h"
Re: [PATCH][DOC] Document new features for GCC 9.
On 2/14/19, David Malcolm wrote: > On Thu, 2019-02-14 at 14:19 -0700, Martin Sebor wrote: >> On 2/13/19 6:48 AM, Martin Liška wrote: >> > Hi. >> > >> > I'm sending patch where I document changes I made during GCC 9 >> > development. I would appreciate both language and factical comments >> > about the patch. >> >> Nothing technical, just a few very minor language nits/suggestions. >> >> Martin >> >> diff --git a/htdocs/gcc-9/changes.html b/htdocs/gcc-9/changes.html >> index 13243c2..9fec9e2 100644 >> --- a/htdocs/gcc-9/changes.html >> +++ b/htdocs/gcc-9/changes.html >> @@ -50,11 +50,64 @@ a work-in-progress. >> General Improvements >> >> >> -A new option -flive-patching=[inline-only-static|inline-clone] >> is >> +A new option >> -flive-patching=[inline-only-static|inline-clone] is >> >> s/is/has been/ would be better (and either a comma after option or >> a definite article without the comma). >> >> introduced to provide a safe compilation for live-patching. At >> the >> same >> time, provides multiple-level control on the enabled IPA >> optimizations. >> See the user guide for further information about the option for >> more >> -details. >> +details. > > Ideally we should add URLs any time we mention an option, linking to > the docs for that option. texinfo's HTML toolchain does give us per- > option anchors. They're not visible [1], but "View Source" shows us > that they do exist; in the form: > > https://gcc.gnu.org/onlinedocs/gcc/SOMETHING.html#indexOPTION > > though annoyingly the SOMETHING varies depending on what kind of option > it is. > > The pertinent one here is: > https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#index-flive-patching > > (FWIW, I have a patch for GCC 10 that emits terminal sequences to > "linkify" the output when diagnostics mention option names, adding a > URL to the docs for the pertinent option). > > [...snip...] > > Dave > > [1] I've emailed the texinfo project about this > The link for that thread is here, for reference: https://lists.gnu.org/archive/html/help-texinfo/2019-02/msg0.html
Re: [PATCH][GCC][DOC] Remove obsolete arm and aarch64 CPU names from invoke.texi
On 19/01/2019 23:37, Gerald Pfeifer wrote: > On Thu, 10 Jan 2019, Sam Tebbs wrote: >>> I believe this should also be covered in the GCC 9 release notes >>> at https://gcc.gnu.org/gcc-9/changes.html ? >> Sorry for the late reply. My email filters seem to have stumbled a bit >> so I didn't pick this up until now. Would you suggest adding something >> along the lines of "Removed obsolete Arm CPU names from the option >> documentation" (perhaps with a full list as in my original email)? > Yes, please. > > Gerald (now needing to look at his filters) Hi Gerald, I was looking into this and it seems that the CPU and architecture removals have already been documented in the Arm-specific section of the GCC 9 changes, so explicitly mentioning that the documentation has been removed as well is probably unnecessary. Sam
Re: [PATCH 02/42] i386: Add mmx_nonimmediate_operand
On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu wrote: > > True if the operand is a register or an nonimmediate operand when > TARGET_MMX_WITH_SSE is false. > > PR target/89021 > * config/i386/predicates.md (mmx_nonimmediate_operand): New. > --- > gcc/config/i386/predicates.md | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md > index 99226e86436..bd1f07a28fb 100644 > --- a/gcc/config/i386/predicates.md > +++ b/gcc/config/i386/predicates.md > @@ -49,6 +49,13 @@ >(and (match_code "reg") > (match_test "MMX_REGNO_P (REGNO (op))"))) > > +;; True if the operand is a register or an nonimmediate operand when > +;; TARGET_MMX_WITH_SSE is false. > +(define_predicate "mmx_nonimmediate_operand" > + (ior (match_operand 0 "register_operand") > + (and (not (match_test "TARGET_MMX_WITH_SSE")) > + (match_operand 0 "nonimmediate_operand" Here you can use "memory_operand". I'd expect you use this new predicate universally throughout the patchset in e.g. + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (, mode, operands)" + "@ + ... + ... + v... + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") When TARGET_MMX_WITH_SSE is true, then only the last two constraints are enabled, so we are sure that only register operand is allowed. While RA can fixup mem->reg by itself, It is beneficial to pass this information to the compiler via predicate, and mmx_nonimmediate_operand fits there perfectly. Uros.
GCC 8.3 Status Report (2019-02-15)
Status == The GCC 8 branch is now frozen for blocking regressions and documentation fixes only, all changes to the branch require a RM approval now. Quality Data Priority # Change from last report --- --- P10 P2 193 - 11 P3 29 + 4 P4 163 - 2 P5 24 --- --- Total P1-P3 222 - 7 Total 409 - 9 Previous Report === https://gcc.gnu.org/ml/gcc/2019-02/msg00034.html
Re: [PATCH 17/42] i386: Emulate MMX mmx_pextrw with SSE
On Fri, Feb 15, 2019 at 6:03 AM H.J. Lu wrote: > > Emulate MMX mmx_pextrw with SSE. Only SSE register source operand is > allowed. > > PR target/89021 > * config/i386/mmx.md (mmx_pextrw): Add SSE emulation. > --- > gcc/config/i386/mmx.md | 16 +--- > 1 file changed, 9 insertions(+), 7 deletions(-) > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 3ea64e9aabe..678eaa713dc 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -1310,16 +1310,18 @@ > (set_attr "mode" "DI")]) > > (define_insn "mmx_pextrw" > - [(set (match_operand:SI 0 "register_operand" "=r") > + [(set (match_operand:SI 0 "register_operand" "=r,r") > (zero_extend:SI > (vec_select:HI > - (match_operand:V4HI 1 "register_operand" "y") > - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]] > - "TARGET_SSE || TARGET_3DNOW_A" > - "pextrw\t{%2, %1, %0|%0, %1, %2}" > - [(set_attr "type" "mmxcvt") > + (match_operand:V4HI 1 "register_operand" "y,Yv") > + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]] > + "(TARGET_MMX || TARGET_MMX_WITH_SSE) > + && (TARGET_SSE || TARGET_3DNOW_A)" > + "%vpextrw\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "mmx_isa" "native,x64") > + (set_attr "type" "mmxcvt,sselog1") > (set_attr "length_immediate" "1") > - (set_attr "mode" "DI")]) > + (set_attr "mode" "DI,TI")]) > > (define_expand "mmx_pshufw" >[(match_operand:V4HI 0 "register_operand") > -- > 2.20.1 > Here is the updated patch for mmx_pextrw. It should be (define_insn "mmx_pextrw" [(set (match_operand:SI 0 "register_operand" "=r,r") (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y,Yv") (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" "@ pextrw\t{%2, %1, %0|%0, %1, %2} %vpextrw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "mmx_isa" "native,x64") (set_attr "type" "mmxcvt,sselog1") (set_attr "length_immediate" "1") (set_attr "mode" "DI,TI")]) -- H.J. From 17bd9eb652aff70a72680f444fbb169344cf563b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 25 Jan 2019 11:27:35 -0800 Subject: [PATCH 17/42] i386: Emulate MMX mmx_pextrw with SSE Emulate MMX mmx_pextrw with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_pextrw): Add SSE emulation. --- gcc/config/i386/mmx.md | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 3ea64e9aabe..1818957f670 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1310,16 +1310,20 @@ (set_attr "mode" "DI")]) (define_insn "mmx_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "=r,r") (zero_extend:SI (vec_select:HI - (match_operand:V4HI 1 "register_operand" "y") - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]] - "TARGET_SSE || TARGET_3DNOW_A" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") + (match_operand:V4HI 1 "register_operand" "y,Yv") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "@ + pextrw\t{%2, %1, %0|%0, %1, %2} + %vpextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "mmxcvt,sselog1") (set_attr "length_immediate" "1") - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI")]) (define_expand "mmx_pshufw" [(match_operand:V4HI 0 "register_operand") -- 2.20.1
Bugs in extended C interop
Dear Paul, I've started putting together my observations on the current status of the F2018 C interop extensions in gfortran 9.0. See the PRs 89363, 89364, 89365, 89366: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89363 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89364 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89365 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89366 Regards Reinhold smime.p7s Description: S/MIME cryptographic signature
libgo patch committed: Add S/390 support to internal/cpu package
This patch by Robin Dapp adds S/390 support to the internal/cpu package. This partially addresses PR 89123. I bootstrapped it on x86_64-pc-linux-gnu, which means little. Committed to mainline. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 268940) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -6877c95a5f44c3ab4f492d2000ce07771341d7b7 +0563f2d018cdb2cd685c254bac5ceb38396d0a27 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: libgo/go/internal/cpu/cpu_gccgo.c === --- libgo/go/internal/cpu/cpu_gccgo.c (revision 268369) +++ libgo/go/internal/cpu/cpu_gccgo.c (working copy) @@ -70,3 +70,118 @@ struct xgetbv_ret xgetbv(void) { #pragma GCC pop_options #endif /* defined(__i386__) || defined(__x86_64__) */ + +#ifdef __s390__ + +struct facilityList { + uint64_t bits[4]; +}; + +struct queryResult { + uint64_t bits[2]; +}; + +struct facilityList stfle(void) + __asm__(GOSYM_PREFIX "internal..z2fcpu.stfle") + __attribute__((no_split_stack)); + +struct facilityList stfle(void) { +struct facilityList ret; +__asm__ ("la%%r1, %[ret]\t\n" +"lghi %%r0, 3\t\n" // last doubleword index to store +"xc0(32,%%r1), 0(%%r1)\t\n" // clear 4 doublewords (32 bytes) +".long 0xb2b01000\t\n" // store facility list extended (STFLE) +:[ret] "=Q" (ret) : : "r0", "r1", "cc"); +return ret; +} + +struct queryResult kmQuery(void) + __asm__(GOSYM_PREFIX "internal..z2fcpu.kmQuery") + __attribute__((no_split_stack)); + +struct queryResult kmQuery() { +struct queryResult ret; + +__asm__ ("lghi %%r0, 0\t\n" // set function code to 0 (KM-Query) +"la %%r1, %[ret]\t\n" +".long 0xb92e0024\t\n" // cipher message (KM) +:[ret] "=Q" (ret) : : "r0", "r1", "cc"); +return ret; +} + +struct queryResult kmcQuery(void) + __asm__(GOSYM_PREFIX "internal..z2fcpu.kmcQuery") + __attribute__((no_split_stack)); + +struct queryResult kmcQuery() { +struct queryResult ret; + +__asm__ ("lghi %%r0, 0\t\n" // set function code to 0 (KMC-Query) +"la %%r1, %[ret]\t\n" +".long 0xb92f0024\t\n" // cipher message with chaining (KMC) +:[ret] "=Q" (ret) : : "r0", "r1", "cc"); + +return ret; +} + +struct queryResult kmctrQuery(void) + __asm__(GOSYM_PREFIX "internal..z2fcpu.kmctrQuery") + __attribute__((no_split_stack)); + +struct queryResult kmctrQuery() { +struct queryResult ret; + +__asm__ ("lghi %%r0, 0\t\n" // set function code to 0 (KMCTR-Query) +"la %%r1, %[ret]\t\n" +".long 0xb92d4024\t\n" // cipher message with counter (KMCTR) +:[ret] "=Q" (ret) : : "r0", "r1", "cc"); + +return ret; +} + +struct queryResult kmaQuery(void) + __asm__(GOSYM_PREFIX "internal..z2fcpu.kmaQuery") + __attribute__((no_split_stack)); + +struct queryResult kmaQuery() { +struct queryResult ret; + +__asm__ ("lghi %%r0, 0\t\n" // set function code to 0 (KMA-Query) +"la %%r1, %[ret]\t\n" +".long 0xb9296024\t\n" // cipher message with authentication (KMA) +:[ret] "=Q" (ret) : : "r0", "r1", "cc"); + +return ret; +} + +struct queryResult kimdQuery(void) + __asm__(GOSYM_PREFIX "internal..z2fcpu.kimdQuery") + __attribute__((no_split_stack)); + +struct queryResult kimdQuery() { +struct queryResult ret; + +__asm__ ("lghi %%r0, 0\t\n" // set function code to 0 (KIMD-Query) +"la %%r1, %[ret]\t\n" +".long 0xb93e0024\t\n" // compute intermediate message digest (KIMD) +:[ret] "=Q" (ret) : : "r0", "r1", "cc"); + +return ret; +} + +struct queryResult klmdQuery(void) + __asm__(GOSYM_PREFIX "internal..z2fcpu.klmdQuery") + __attribute__((no_split_stack)); + +struct queryResult klmdQuery() { +struct queryResult ret; + +__asm__ ("lghi %%r0, 0\t\n" // set function code to 0 (KLMD-Query) +"la %%r1, %[ret]\t\n" +".long 0xb93f0024\t\n" // compute last message digest (KLMD) +:[ret] "=Q" (ret) : : "r0", "r1", "cc"); + +return ret; +} + +#endif /* defined(__s390__) */ Index: libgo/go/internal/cpu/cpu_s390x.go === --- libgo/go/internal/cpu/cpu_s390x.go (revision 268369) +++ libgo/go/internal/cpu/cpu_s390x.go (working copy) @@ -98,13 +98,13 @@ func (s *facilityList) Has(fs ...facilit // The following feature detection functions are defined in cpu_s390x.s. // They are likely to be expensive to call so the results should be cached. -func stfle() facilityList { panic("not implemented for gccgo") } -func kmQuery() queryResult{ panic("not implemented for gccgo") } -func kmcQuery()
Re: Go patch committed: Harmonize types referenced by both C and Go
On Fri, Feb 15, 2019 at 4:03 AM Rainer Orth wrote: > > Andreas Schwab writes: > > > This breaks non-split-stack builds. > > > > ../../../libgo/runtime/stack.c: In function 'doscanstack1': > > ../../../libgo/runtime/stack.c:113:18: error: passing argument 1 of > > 'scanstackblock' makes integer from pointer without a cast > > [-Werror=int-conversion] > > 113 | scanstackblock(bottom, (uintptr)(top - bottom), gcw); > > | ^~ > > | | > > | byte * {aka unsigned char *} > > I see the same on Solaris. Even with that fixed by appropriate casts to > uintptr (plus a few more times), Solaris bootstrap is still broken by > that patch: > > /vol/gcc/src/hg/trunk/local/libgo/runtime/go-varargs.c: In function > '__go_syscall6': > /vol/gcc/src/hg/trunk/local/libgo/runtime/go-varargs.c:101:10: error: > implicit declaration of function 'syscall' > [-Werror=implicit-function-declaration] > 101 | return syscall (flag, a1, a2, a3, a4, a5, a6); > | ^~~ > > This needs to include for the syscall declaration, apart > from the fundamental problem that syscall isn't a stable interface on > Solaris. I committed this patch which should fix the Solaris build. The code was already calling syscall, it was just doing it in a way that the types didn't necessarily match the C declaration. This is the implementation of Go's syscall.Syscall function, so there isn't really anything else we can do. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 268939) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -a9c1a76e14b66a356d3c3dfb50f1e6138e97733c +6877c95a5f44c3ab4f492d2000ce07771341d7b7 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: libgo/runtime/go-varargs.c === --- libgo/runtime/go-varargs.c (revision 268923) +++ libgo/runtime/go-varargs.c (working copy) @@ -12,6 +12,12 @@ #include #include #include +#ifdef HAVE_SYSCALL_H +#include +#endif +#ifdef HAVE_SYS_SYSCALL_H +#include +#endif /* The syscall package calls C functions. The Go compiler can not represent a C varargs functions. On some systems it's important
[PR 89330] Avoid adding dead speculative edges to inlinig heap
Hi, Martin discovered that inliner was adding deleted call graph edges to its heap when supposedly processing newly discovered direct edges. The problem is that a new edge created in the speculation part of the indirect inlining machinery created speculative edges that were immediately afterwards removed by check_speculations() after it figured out the edge is not speculation_useful_p(). The fix below avoids creating such non-speculation_useful_p edges in the first place. The edge is not useful because it cannot be inlined because the callee calls comdat local functions. I had to split can_inline_edge_p into two functions to allow perform the caller and callee checks before actually creating an edge. I think this is safe and beneficial to commit now, maybe with the exception of the newly added assert in add_new_edges_to_heap, since inlining apparently can cope with such nonsensical edges in the heap. But in that case I'd add the assert in the next stage1. Bootstrapped and tested on x86_64-linux. IIUC, Martin even LTO-bootstrapped it. OK for trunk? Thanks, Martin 2019-02-15 Martin Jambor PR ipa/89330 * ipa-inline.c (can_inline_edge_p): Move most of the checks... (call_not_inlinable_p): ...this new function. (add_new_edges_to_heap): Assert a caller is known. * ipa-inline.h (call_not_inlinable_p): Declare. * ipa-prop.c: Include ipa-inline.h (try_make_edge_direct_virtual_call): Create speculative edges only if there is any chance of inlining them. testsuite/ * g++.dg/lto/pr89330_[01].C: New test. --- gcc/ipa-inline.c | 128 --- gcc/ipa-inline.h | 4 +- gcc/ipa-prop.c | 8 +- gcc/testsuite/g++.dg/lto/pr89330_0.C | 50 +++ gcc/testsuite/g++.dg/lto/pr89330_1.C | 36 5 files changed, 154 insertions(+), 72 deletions(-) create mode 100644 gcc/testsuite/g++.dg/lto/pr89330_0.C create mode 100644 gcc/testsuite/g++.dg/lto/pr89330_1.C diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 360c3de3289..ae330943571 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -299,12 +299,60 @@ sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee) (opts_for_fn (caller->decl)->x_##flag\ != opts_for_fn (callee->decl)->x_##flag) +/* Return CIF_OK if a call from CALLER to CALLEE is or would be inlineable. + Otherwise, return the reason why it cannot. EARLY should be set when + deciding about early inlining. */ + +enum cgraph_inline_failed_t +call_not_inlinable_p (cgraph_node *caller, cgraph_node *callee, + bool early) +{ + enum availability avail; + caller = caller->global.inlined_to ? caller->global.inlined_to : caller; + callee = callee->ultimate_alias_target (, caller); + + if (!callee->definition) +return CIF_BODY_NOT_AVAILABLE; + if (!early && (!opt_for_fn (callee->decl, optimize) +|| !opt_for_fn (caller->decl, optimize))) +return CIF_FUNCTION_NOT_OPTIMIZED; + else if (callee->calls_comdat_local) +return CIF_USES_COMDAT_LOCAL; + else if (avail <= AVAIL_INTERPOSABLE) +return CIF_OVERWRITABLE; + /* Don't inline if the functions have different EH personalities. */ + else if (DECL_FUNCTION_PERSONALITY (caller->decl) + && DECL_FUNCTION_PERSONALITY (callee->decl) + && (DECL_FUNCTION_PERSONALITY (caller->decl) + != DECL_FUNCTION_PERSONALITY (callee->decl))) +return CIF_EH_PERSONALITY; + /* TM pure functions should not be inlined into non-TM_pure + functions. */ + else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl)) +return CIF_UNSPECIFIED; + /* Check compatibility of target optimization options. */ + else if (!targetm.target_option.can_inline_p (caller->decl, + callee->decl)) +return CIF_TARGET_OPTION_MISMATCH; + else if (ipa_fn_summaries->get (callee) == NULL + || !ipa_fn_summaries->get (callee)->inlinable) +return CIF_FUNCTION_NOT_INLINABLE; + /* Don't inline a function with mismatched sanitization attributes. */ + else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl)) +return CIF_ATTRIBUTE_MISMATCH; + else if (callee->externally_visible + && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC) +return CIF_EXTERN_LIVE_ONLY_STATIC; + return CIF_OK; +} + /* Decide if we can inline the edge and possibly update inline_failed reason. We check whether inlining is possible at all and whether caller growth limits allow doing so. - if REPORT is true, output reason to the dump file. */ + If REPORT is true, output reason to the dump file. EARLY should be set when + deciding about early inlining. */ static bool can_inline_edge_p (struct cgraph_edge *e, bool report, @@ -319,81 +367,22 @@ can_inline_edge_p (struct cgraph_edge
[PATCH 29/42] i386: Emulate MMX ssse3_phwv4hi3 with SSE
Emulate MMX ssse3_phwv4hi3 with SSE by moving bits 64:95 to bits 32:63 in SSE register. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_phwv4hi3): Changed to define_insn_and_split to support SSE emulation. --- gcc/config/i386/sse.md | 34 ++ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f37658630dd..1c31a1fbad0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15232,13 +15232,13 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "ssse3_phwv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") +(define_insn_and_split "ssse3_phwv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (vec_concat:V4HI (vec_concat:V2HI (ssse3_plusminus:HI (vec_select:HI - (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 1 "register_operand" "0,0,Yv") (parallel [(const_int 0)])) (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) (ssse3_plusminus:HI @@ -15247,19 +15247,37 @@ (vec_concat:V2HI (ssse3_plusminus:HI (vec_select:HI - (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv") (parallel [(const_int 0)])) (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) (ssse3_plusminus:HI (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))] - "TARGET_SSSE3" - "phw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + phw\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(const_int 0)] +{ + /* Generate SSE version of the operation. */ + rtx op0 = lowpart_subreg (V8HImode, operands[0], + GET_MODE (operands[0])); + rtx op1 = lowpart_subreg (V8HImode, operands[1], + GET_MODE (operands[1])); + rtx op2 = lowpart_subreg (V8HImode, operands[2], + GET_MODE (operands[2])); + emit_insn (gen_ssse3_phwv8hi3 (op0, op1, op2)); + ix86_move_vector_high_sse_to_mmx (op0); + DONE; +} + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "avx2_phdv8si3" [(set (match_operand:V8SI 0 "register_operand" "=x") -- 2.20.1
[PATCH 30/42] i386: Emulate MMX ssse3_phdv2si3 with SSE
Emulate MMX ssse3_phdv2si3 with SSE by moving bits 64:95 to bits 32:63 in SSE register. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_phdv2si3): Changed to define_insn_and_split to support SSE emulation. --- gcc/config/i386/sse.md | 34 ++ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1c31a1fbad0..cb4a1c9fc59 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15356,26 +15356,44 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn "ssse3_phdv2si3" - [(set (match_operand:V2SI 0 "register_operand" "=y") +(define_insn_and_split "ssse3_phdv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv") (vec_concat:V2SI (plusminus:SI (vec_select:SI - (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 1 "register_operand" "0,0,Yv") (parallel [(const_int 0)])) (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) (plusminus:SI (vec_select:SI - (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv") (parallel [(const_int 0)])) (vec_select:SI (match_dup 2) (parallel [(const_int 1)])] - "TARGET_SSSE3" - "phd\t{%2, %0|%0, %2}" - [(set_attr "type" "sseiadd") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + phd\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(const_int 0)] +{ + /* Generate SSE version of the operation. */ + rtx op0 = lowpart_subreg (V4SImode, operands[0], + GET_MODE (operands[0])); + rtx op1 = lowpart_subreg (V4SImode, operands[1], + GET_MODE (operands[1])); + rtx op2 = lowpart_subreg (V4SImode, operands[2], + GET_MODE (operands[2])); + emit_insn (gen_ssse3_phdv4si3 (op0, op1, op2)); + ix86_move_vector_high_sse_to_mmx (op0); + DONE; +} + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sseiadd") (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "avx2_pmaddubsw256" [(set (match_operand:V16HI 0 "register_operand" "=x,v") -- 2.20.1
[PATCH 10/42] i386: Emulate MMX 3 with SSE
Emulate MMX 3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (any_logic:3): New. (any_logic:*mmx_3): Also allow TARGET_MMX_WITH_SSE. Add SSE support. --- gcc/config/i386/mmx.md | 27 --- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index eef17504616..7a253005aba 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1066,15 +1066,28 @@ "TARGET_MMX" "ix86_fixup_binary_operands_no_copy (, mode, operands);") +(define_expand "3" + [(set (match_operand:MMXMODEI 0 "register_operand") + (any_logic:MMXMODEI + (match_operand:MMXMODEI 1 "nonimmediate_operand") + (match_operand:MMXMODEI 2 "nonimmediate_operand")))] + "TARGET_MMX_WITH_SSE" + "ix86_fixup_binary_operands_no_copy (, mode, operands);") + (define_insn "*mmx_3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") (any_logic:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0") - (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))] - "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)" - "p\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxadd") - (set_attr "mode" "DI")]) + (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yv") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && ix86_binary_operator_ok (, mode, operands)" + "@ + p\t{%2, %0|%0, %2} + p\t{%2, %0|%0, %2} + vp\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxadd,sselog,sselog") + (set_attr "mode" "DI,TI,TI")]) ; ;; -- 2.20.1
[PATCH 20/42] i386: Emulate MMX mmx_pmovmskb with SSE
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb from QImode to SImode. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_pmovmskb): Changed to define_insn_and_split to support SSE emulation. --- gcc/config/i386/mmx.md | 30 +++--- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 8833c9f091b..1adb50aa4b1 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1760,14 +1760,30 @@ [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_pmovmskb" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] +(define_insn_and_split "mmx_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")] UNSPEC_MOVMSK))] - "TARGET_SSE || TARGET_3DNOW_A" - "pmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "@ + pmovmskb\t{%1, %0|%0, %1} + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 0) +(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)) + (set (match_dup 0) + (zero_extend:SI (match_dup 2)))] +{ + /* Generate SSE pmovmskb and zero-extend from QImode to SImode. */ + operands[1] = lowpart_subreg (V16QImode, operands[1], + GET_MODE (operands[1])); + operands[2] = lowpart_subreg (QImode, operands[0], + GET_MODE (operands[0])); +} + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "mmxcvt,ssemov") + (set_attr "mode" "DI,TI")]) (define_expand "mmx_maskmovq" [(set (match_operand:V8QI 0 "memory_operand") -- 2.20.1
[PATCH 38/42] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
PR target/89021 * config/i386/mmx.md (MMXMODE:mov): Also allow TARGET_MMX_WITH_SSE. (MMXMODE:*mov_internal): Likewise. (MMXMODE:movmisalign): Likewise. --- gcc/config/i386/mmx.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index eaca71d5750..c5c0c449aab 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -70,7 +70,7 @@ (define_expand "mov" [(set (match_operand:MMXMODE 0 "nonimmediate_operand") (match_operand:MMXMODE 1 "nonimmediate_operand"))] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_move (mode, operands); DONE; @@ -81,7 +81,7 @@ "=r ,o ,r,r ,m ,?!y,!y,?!y,m ,r ,?!y,v,v,v,m,r,v,!y,*x") (match_operand:MMXMODE 1 "nonimm_or_0_operand" "rCo,rC,C,rm,rC,C ,!y,m ,?!y,?!y,r ,C,v,m,v,v,r,*x,!y"))] - "TARGET_MMX + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { switch (get_attr_type (insn)) @@ -207,7 +207,7 @@ (define_expand "movmisalign" [(set (match_operand:MMXMODE 0 "nonimmediate_operand") (match_operand:MMXMODE 1 "nonimmediate_operand"))] - "TARGET_MMX" + "TARGET_MMX || TARGET_MMX_WITH_SSE" { ix86_expand_vector_move (mode, operands); DONE; -- 2.20.1
[PATCH 33/42] i386: Emulate MMX pshufb with SSE version
Emulate MMX version of pshufb with SSE version by masking out the bit 3 of the shuffle control byte. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (ssse3_pshufbv8qi3): Changed to define_insn_and_split. Also allow TARGET_MMX_WITH_SSE. Add SSE emulation. --- gcc/config/i386/sse.md | 46 +- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 2b91f8f5839..6fa9f383cd3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15697,17 +15697,45 @@ (set_attr "btver2_decode" "vector") (set_attr "mode" "")]) -(define_insn "ssse3_pshufbv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] -UNSPEC_PSHUFB))] - "TARGET_SSSE3" - "pshufb\t{%2, %0|%0, %2}"; - [(set_attr "type" "sselog1") +(define_insn_and_split "ssse3_pshufbv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") + (match_operand:V8QI 2 "mmx_nonimmediate_operand" "ym,x,Yv")] +UNSPEC_PSHUFB)) + (clobber (match_scratch:V4SI 3 "=X,x,Yv"))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + pshufb\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 3) + (and:V4SI (match_dup 3) (match_dup 2))) + (set (match_dup 0) + (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))] +{ + /* Emulate MMX version of pshufb with SSE version by masking out the + bit 3 of the shuffle control byte. */ + operands[0] = lowpart_subreg (V16QImode, operands[0], + GET_MODE (operands[0])); + operands[1] = lowpart_subreg (V16QImode, operands[1], + GET_MODE (operands[1])); + operands[2] = lowpart_subreg (V4SImode, operands[2], + GET_MODE (operands[2])); + operands[4] = lowpart_subreg (V16QImode, operands[3], + GET_MODE (operands[3])); + rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7), +GEN_INT (0xf7f7f7f7), +GEN_INT (0xf7f7f7f7), +GEN_INT (0xf7f7f7f7)); + rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par); + operands[5] = force_const_mem (V4SImode, vec_const); +} + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "_psign3" [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x") -- 2.20.1
[PATCH 28/42] i386: Make _mm_empty () as NOP when MMX is disabled
With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP when MMX is disabled. PR target/89021 * config/i386/mmx.md (EMMS): Also allow TARGET_MMX_WITH_SSE. (mmx_): Generate "" only when MMX is enabled. --- gcc/config/i386/mmx.md | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index d662663a445..eaca71d5750 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1839,7 +1839,7 @@ (set_attr "mode" "DI")]) (define_int_iterator EMMS - [(UNSPECV_EMMS "TARGET_MMX") + [(UNSPECV_EMMS "TARGET_MMX || TARGET_MMX_WITH_SSE") (UNSPECV_FEMMS "TARGET_3DNOW")]) (define_int_attr emms @@ -1865,7 +1865,9 @@ (clobber (reg:DI MM6_REG)) (clobber (reg:DI MM7_REG))] "" - "" +{ + return TARGET_MMX ? "" : ""; +} [(set_attr "type" "mmx") (set_attr "modrm" "0") (set_attr "memory" "none")]) -- 2.20.1
[PATCH 24/42] i386: Emulate MMX mmx_uavgv4hi3 with SSE
Emulate MMX mmx_uavgv4hi3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_uavgv4hi3): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (*mmx_uavgv4hi3): Add SSE emulation. --- gcc/config/i386/mmx.md | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 0bd87ba79e8..456d1a51c50 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1739,27 +1739,33 @@ (const_vector:V4SI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1] - "TARGET_SSE || TARGET_3DNOW_A" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);") (define_insn "*mmx_uavgv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (truncate:V4HI (lshiftrt:V4SI (plus:V4SI (plus:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv")) (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv"))) (const_vector:V4SI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1] - "(TARGET_SSE || TARGET_3DNOW_A) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (PLUS, V4HImode, operands)" - "pavgw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) + "@ + pavgw\t{%2, %0|%0, %2} + pavgw\t{%2, %0|%0, %2} + vpavgw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sseiadd,sseiadd") + (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_psadbw" [(set (match_operand:V1DI 0 "register_operand" "=y") -- 2.20.1
[PATCH 40/42] i386: Allow MMX intrinsic emulation with SSE
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3. Don't enable MMX ISA by default with TARGET_MMX_WITH_SSE. For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit mode since MMX intrinsics can be emulated wit SSE. gcc/ PR target/89021 * config/i386/i386-builtin.def: Enable MMX intrinsics with SSE/SSE2/SSSE3. * config/i386/i386.c (ix86_init_mmx_sse_builtins): Likewise. (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX intrinsics with TARGET_MMX_WITH_SSE. * config/i386/mmintrin.h: Only require SSE2 if __MMX_WITH_SSE__ is defined. gcc/testsuite/ PR target/89021 * gcc.target/i386/pr82483-1.c: Error only on ia32. * gcc.target/i386/pr82483-2.c: Likewise. --- gcc/config/i386/i386-builtin.def | 126 +++--- gcc/config/i386/i386.c| 29 - gcc/config/i386/mmintrin.h| 12 ++- gcc/testsuite/gcc.target/i386/pr82483-1.c | 2 +- gcc/testsuite/gcc.target/i386/pr82483-2.c | 2 +- 5 files changed, 101 insertions(+), 70 deletions(-) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 88005f4687f..10a9d631f29 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKN BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID) /* MMX */ -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID) +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID) /* 3DNow! */ BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID) @@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNO BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT) /* MMX */ -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) - -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) - -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI) - -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI) -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int)
[PATCH 25/42] i386: Emulate MMX mmx_psadbw with SSE
Emulate MMX mmx_psadbw with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_psadbw): Add SSE emulation. --- gcc/config/i386/mmx.md | 19 --- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 456d1a51c50..8ba8ca6ea45 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1768,14 +1768,19 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_psadbw" - [(set (match_operand:V1DI 0 "register_operand" "=y") -(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0") - (match_operand:V8QI 2 "nonimmediate_operand" "ym")] + [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv") +(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") + (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv")] UNSPEC_PSADBW))] - "TARGET_SSE || TARGET_3DNOW_A" - "psadbw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "@ + psadbw\t{%2, %0|%0, %2} + psadbw\t{%2, %0|%0, %2} + vpsadbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sseiadd,sseiadd") + (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_pmovmskb" [(set (match_operand:SI 0 "register_operand" "=r,r") -- 2.20.1
[PATCH 27/42] i386: Emulate MMX umulv1siv1di3 with SSE2
Emulate MMX umulv1siv1di3 with SSE2. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (sse2_umulv1siv1di3): Add SSE emulation support. (*sse2_umulv1siv1di3): Add SSE2 emulation. --- gcc/config/i386/mmx.md | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 427a037fa62..d662663a445 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -911,24 +911,30 @@ (vec_select:V1SI (match_operand:V2SI 2 "nonimmediate_operand") (parallel [(const_int 0)])] - "TARGET_SSE2" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2" "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);") (define_insn "*sse2_umulv1siv1di3" - [(set (match_operand:V1DI 0 "register_operand" "=y") + [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv") (mult:V1DI (zero_extend:V1DI (vec_select:V1SI - (match_operand:V2SI 1 "nonimmediate_operand" "%0") + (match_operand:V2SI 1 "nonimmediate_operand" "%0,0,Yv") (parallel [(const_int 0)]))) (zero_extend:V1DI (vec_select:V1SI - (match_operand:V2SI 2 "nonimmediate_operand" "ym") + (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv") (parallel [(const_int 0)])] - "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)" - "pmuludq\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && TARGET_SSE2 + && ix86_binary_operator_ok (MULT, V2SImode, operands)" + "@ + pmuludq\t{%2, %0|%0, %2} + pmuludq\t{%2, %0|%0, %2} + vpmuludq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxmul,ssemul,ssemul") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_v4hi3" [(set (match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 23/42] i386: Emulate MMX mmx_uavgv8qi3 with SSE
Emulate MMX mmx_uavgv8qi3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_uavgv8qi3): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (*mmx_uavgv8qi3): Add SSE emulation. --- gcc/config/i386/mmx.md | 21 + 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 940f022464d..0bd87ba79e8 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1684,42 +1684,47 @@ (const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1] - "TARGET_SSE || TARGET_3DNOW" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);") (define_insn "*mmx_uavgv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") (truncate:V8QI (lshiftrt:V8HI (plus:V8HI (plus:V8HI (zero_extend:V8HI - (match_operand:V8QI 1 "nonimmediate_operand" "%0")) + (match_operand:V8QI 1 "nonimmediate_operand" "%0,0,Yv")) (zero_extend:V8HI - (match_operand:V8QI 2 "nonimmediate_operand" "ym"))) + (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv"))) (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1] - "(TARGET_SSE || TARGET_3DNOW) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (PLUS, V8QImode, operands)" { /* These two instructions have the same operation, but their encoding is different. Prefer the one that is de facto standard. */ - if (TARGET_SSE || TARGET_3DNOW_A) + if (TARGET_MMX_WITH_SSE && TARGET_AVX) +return "vpavgb\t{%2, %1, %0|%0, %1, %2}"; + else if (TARGET_SSE || TARGET_3DNOW_A) return "pavgb\t{%2, %0|%0, %2}"; else return "pavgusb\t{%2, %0|%0, %2}"; } - [(set_attr "type" "mmxshft") + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sseiadd,sseiadd") (set (attr "prefix_extra") (if_then_else (not (ior (match_test "TARGET_SSE") (match_test "TARGET_3DNOW_A"))) (const_string "1") (const_string "*"))) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_uavgv4hi3" [(set (match_operand:V4HI 0 "register_operand") -- 2.20.1
[PATCH 41/42] i386: Enable TM MMX intrinsics with SSE2
This pach enables TM MMX intrinsics with SSE2 when MMX is disabled. PR target/89021 * config/i386/i386.c (bdesc_tm): Enable MMX intrinsics with SSE2. --- gcc/config/i386/i386.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 073a2534d1f..319a98f824a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -31065,13 +31065,13 @@ static const struct builtin_description bdesc_##kind[] = \ we're lazy. Add casts to make them fit. */ static const struct builtin_description bdesc_tm[] = { - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, @@ -31089,7 +31089,7 @@ static const struct builtin_description bdesc_tm[] = { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, - { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, + { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID }, { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, }; -- 2.20.1
[PATCH 36/42] i386: Emulate MMX abs2 with SSE
Emulate MMX abs2 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (abs2): Add SSE emulation. --- gcc/config/i386/sse.md | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e17f395688b..0174778833a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15942,16 +15942,19 @@ }) (define_insn "abs2" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv") (abs:MMXMODEI - (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] - "TARGET_SSSE3" - "pabs\t{%1, %0|%0, %1}"; - [(set_attr "type" "sselog1") + (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym,Yv")))] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3" + "@ + pabs\t{%1, %0|%0, %1} + %vpabs\t{%1, %0|%0, %1}" + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "sselog1") (set_attr "prefix_rep" "0") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI")]) ; ;; -- 2.20.1
[PATCH 32/42] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
Emulate MMX ssse3_pmulhrswv4hi3 with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation. --- gcc/config/i386/sse.md | 20 +--- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f2dbb51c7fd..2b91f8f5839 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15652,25 +15652,31 @@ (set_attr "mode" "")]) (define_insn "*ssse3_pmulhrswv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (truncate:V4HI (lshiftrt:V4SI (plus:V4SI (lshiftrt:V4SI (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv")) (sign_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv"))) (const_int 14)) (match_operand:V4HI 3 "const1_operand")) (const_int 1] - "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" - "pmulhrsw\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && TARGET_SSSE3 + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + pmulhrsw\t{%2, %0|%0, %2} + pmulhrsw\t{%2, %0|%0, %2} + vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "sseimul") (set_attr "prefix_extra" "1") (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI,TI")]) (define_insn "_pshufb3" [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") -- 2.20.1
[PATCH 21/42] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
Emulate MMX mmx_umulv4hi3_highpart with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check TARGET_MMX and TARGET_MMX_WITH_SSE. (*mmx_umulv4hi3_highpart): Add SSE emulation. --- gcc/config/i386/mmx.md | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 1adb50aa4b1..940f022464d 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -785,24 +785,30 @@ (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand"))) (const_int 16] - "TARGET_SSE || TARGET_3DNOW_A" + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_umulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (truncate:V4HI (lshiftrt:V4SI (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "nonimmediate_operand" "%0")) + (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv")) (zero_extend:V4SI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv"))) (const_int 16] - "(TARGET_SSE || TARGET_3DNOW_A) + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (MULT, V4HImode, operands)" - "pmulhuw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxmul") - (set_attr "mode" "DI")]) + "@ + pmulhuw\t{%2, %0|%0, %2} + pmulhuw\t{%2, %0|%0, %2} + vpmulhuw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxmul,ssemul,ssemul") + (set_attr "mode" "DI,TI,TI")]) (define_expand "mmx_pmaddwd" [(set (match_operand:V2SI 0 "register_operand") -- 2.20.1
[PATCH 17/42] i386: Emulate MMX mmx_pextrw with SSE
Emulate MMX mmx_pextrw with SSE. Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_pextrw): Add SSE emulation. --- gcc/config/i386/mmx.md | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 3ea64e9aabe..678eaa713dc 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1310,16 +1310,18 @@ (set_attr "mode" "DI")]) (define_insn "mmx_pextrw" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "=r,r") (zero_extend:SI (vec_select:HI - (match_operand:V4HI 1 "register_operand" "y") - (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]] - "TARGET_SSE || TARGET_3DNOW_A" - "pextrw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "mmxcvt") + (match_operand:V4HI 1 "register_operand" "y,Yv") + (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]] + "(TARGET_MMX || TARGET_MMX_WITH_SSE) + && (TARGET_SSE || TARGET_3DNOW_A)" + "%vpextrw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mmx_isa" "native,x64") + (set_attr "type" "mmxcvt,sselog1") (set_attr "length_immediate" "1") - (set_attr "mode" "DI")]) + (set_attr "mode" "DI,TI")]) (define_expand "mmx_pshufw" [(match_operand:V4HI 0 "register_operand") -- 2.20.1