Re: libgo patch committed: Add S/390 support to internal/cpu package

2019-02-15 Thread Jakub Jelinek
On Fri, Feb 15, 2019 at 08:59:29PM +0100, Matthias Klose wrote:
> On 15.02.19 15:52, Ian Lance Taylor wrote:
> > This patch by Robin Dapp adds S/390 support to the internal/cpu
> > package.  This partially addresses PR 89123.  I bootstrapped it on
> > x86_64-pc-linux-gnu, which means little.  Committed to mainline.
> 
> fails in the -m31 multilib variant with

Indeed.  Given that there is just
libgo/go/internal/cpu/cpu_s390x.go
libgo/go/internal/cpu/cpu_s390x_test.go
(note, no s390), I think the easiest fix is:

--- libgo/go/internal/cpu/cpu_gccgo.c.jj2019-02-16 07:57:27.882179972 
+0100
+++ libgo/go/internal/cpu/cpu_gccgo.c   2019-02-16 08:36:37.241900882 +0100
@@ -71,7 +71,7 @@ struct xgetbv_ret xgetbv(void) {
 
 #endif /* defined(__i386__) || defined(__x86_64__)  */
 
-#ifdef __s390__
+#ifdef __s390x__
 
 struct facilityList {
uint64_t bits[4];
@@ -184,4 +184,4 @@ struct queryResult klmdQuery() {
 return ret;
 }
 
-#endif /* defined(__s390__)  */
+#endif /* defined(__s390x__)  */

If cpu_s390.go is ever added, this can be changed again and there can be say
#ifdef __s390x__
#define LHI "lghi"
#else
#define LHI "lhi"
#endif
and replace "lghi ... in the inline asm with LHI "...

Jakub


[PATCH] Decrease {i386,sse}.md global state by 12KB

2019-02-15 Thread Jakub Jelinek
Hi!

This is something I've noticed in a s390 change I'll post soon (where it was
even completely unnecessary), but it applies to i386 backend too.
Seems we have lots of .bss global state, 66x 64-byte and 61x 128-byte long
static buffers.  Instead of doing
  static char buf[128];
  ...
  s{,n}printf (buf, ...);
  ...
  return buf;
in the insn templates we can do:
  char buf[128];
  ...
  s{,n}printf (buf, ...);
  ...
  output_asm_insn (buf, operands);
  return "";
and avoid that way the global state.  The only problem with that is
that final.c does something in between:
1) if return from the template is NULL, not this case
2) if return from the template is "#", not this case
3) if (targetm.asm_out.unwind_emit_before_insn
&& targetm.asm_out.unwind_emit)
  targetm.asm_out.unwind_emit (asm_out_file, insn);
   while cygming.h has
#define TARGET_ASM_UNWIND_EMIT  i386_pe_seh_unwind_emit
#define TARGET_ASM_UNWIND_EMIT_BEFORE_INSN  false
   it is ok too (and other i386 subtargets don't do either,
   so unwind_emit_before_insn is true (the default) and unwind_emit
   NULL
4) rtx_call_insn *call_insn = dyn_cast  (insn);
if (call_insn != NULL)
   that is for calls only, the patch doesn't change any calls
Those 4 spots are in between get_insn_template and
output_asm_insn (templ, recog_data.operand);
which starts with:
  /* An insn may return a null string template
 in a case where no assembler code is needed.  */
  if (*templ == 0)
return;
so I think the patch doesn't make it more costly, there is just
one output_asm_insn extra call and the old one will return immediately.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-16  Jakub Jelinek  

* config/i386/i386.md (*movqi_internal): Remove static from
buf variable.  Use output_asm_insn (buf, operands); return "";
instead of return buf;.
* config/i386/sse.md (_andnot3,
*3, *andnot3, *andnottf3, *3,
*tf3, 3): Likewise.

--- gcc/config/i386/i386.md.jj  2019-02-12 21:48:53.183072497 +0100
+++ gcc/config/i386/i386.md 2019-02-15 23:25:36.198589133 +0100
@@ -2531,7 +2531,7 @@ (define_insn "*movqi_internal"
"Q ,R,r,n,m,q,rn, m,qn,r,k,k,k,m,C,BC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
@@ -2564,7 +2564,8 @@ (define_insn "*movqi_internal"
   suffix = (get_attr_mode (insn) == MODE_HI) ? "w" : "b";
 
   snprintf (buf, sizeof (buf), ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 
 case TYPE_MSKLOG:
   if (operands[1] == const0_rtx)
--- gcc/config/i386/sse.md.jj   2019-02-14 08:06:39.446519415 +0100
+++ gcc/config/i386/sse.md  2019-02-15 23:28:54.305366640 +0100
@@ -3198,7 +3198,7 @@ (define_insn "_andnot3"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
@@ -3233,7 +3233,8 @@ (define_insn "_andnot3_andnot3_andnot3|%%0, 
%%1, %%2}",
ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 }
   [(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -3314,7 +3316,7 @@ (define_insn "*3"
   "TARGET_SSE && 
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
@@ -3349,7 +3351,8 @@ (define_insn "*3"
 }
 
   snprintf (buf, sizeof (buf), ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 }
   [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
(set_attr "type" "sselog")
@@ -3378,7 +3381,7 @@ (define_insn "*3"
  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix;
 
@@ -3395,7 +3398,8 @@ (define_insn "*3"
   snprintf (buf, sizeof (buf),
   "v%s%s\t{%%2, %%1, %%0|%%0, 
%%1, %%2}",
   ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 }
   [(set_attr "type" "sselog")
(set_attr "prefix" "evex")
@@ -3449,7 +3453,7 @@ (define_insn "*andnot3"
(match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
   "SSE_FLOAT_MODE_P (mode)"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *suffix
 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "";
@@ -3485,7 +3489,8 @@ (define_insn "*andnot3"
 }
 
   snprintf (buf, sizeof (buf), ops, suffix);
-  return buf;
+  output_asm_insn (buf, operands);
+  return "";
 }
   [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
@@ -3516,7 +3521,7 @@ (define_insn "*andnottf3"
  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
   "TARGET_SSE"
 {
-  static char buf[128];
+  char buf[128];
   const char *ops;
   const char *tmp
 = 

[PATCH] Improve mem = STRING_CST expansion (PR rtl-optimization/66152)

2019-02-15 Thread Jakub Jelinek
Hi!

On the following testcase, we've regressed in bar since 8.x, in 8.x
store merging came up with mem = 64-bit constant, but starting with the
change to transform {0,1,2,3,4,5,6,7} char initializers into STRING_CSTs,
we don't do that anymore.  The mem = STRING_CST expansion can do that,
but only if there are no embedded zeros.  The following patch improves
it even for embedded zeros, by using a new callback for the
can_store_by_pieces/store_by_pieces calls which knows how to handle
STRING_CST.  We don't need strlen in that case, can use TREE_STRING_CST
instead.  Additionally, if the STRING_CST is slightly shorter than the
destination region, it might generate better code by trying to
store_by_pieces it all in one go (bytes from STRING_CST until the last one,
followed by artificially added zeros) and only if that doesn't seem to be
beneficial (e.g. very small STRING_CST followed by kilobytes of zeros)
goes for the store_by_pieces of STRING_CST (rounded up to next
STORE_MAX_PIECES) followed by a clear_storage.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-16  Jakub Jelinek  

PR rtl-optimization/66152
* builtins.h (c_readstr): Declare.
* builtins.c (c_readstr): Remove forward declaration.  Add
null_terminated_p argument, if false, read all bytes from the
string instead of stopping after '\0'.
* expr.c (string_cst_read_str): New function.
(store_expr): Use string_cst_read_str instead of
builtin_strncpy_read_str.  Try to store by pieces the whole
exp_len first, and only if that fails, split it up into
store by pieces followed by clear_storage.  Formatting fix.

* gcc.target/i386/pr66152.c: New test.

--- gcc/builtins.h.jj   2019-02-14 08:06:37.878546571 +0100
+++ gcc/builtins.h  2019-02-15 11:33:50.208180171 +0100
@@ -103,6 +103,7 @@ struct c_strlen_data
 };
 
 extern tree c_strlen (tree, int, c_strlen_data * = NULL, unsigned = 1);
+extern rtx c_readstr (const char *, scalar_int_mode, bool = true);
 extern void expand_builtin_setjmp_setup (rtx, rtx);
 extern void expand_builtin_setjmp_receiver (rtx);
 extern void expand_builtin_update_setjmp_buf (rtx);
--- gcc/builtins.c.jj   2019-02-11 20:58:48.509965578 +0100
+++ gcc/builtins.c  2019-02-15 11:37:00.046029652 +0100
@@ -95,7 +95,6 @@ builtin_info_type builtin_info[(int)END_
 /* Non-zero if __builtin_constant_p should be folded right away.  */
 bool force_folding_builtin_constant_p;
 
-static rtx c_readstr (const char *, scalar_int_mode);
 static int target_char_cast (tree, char *);
 static rtx get_memory_rtx (tree, tree);
 static int apply_args_size (void);
@@ -802,10 +801,14 @@ c_strlen (tree src, int only_value, c_st
 }
 
 /* Return a constant integer corresponding to target reading
-   GET_MODE_BITSIZE (MODE) bits from string constant STR.  */
-
-static rtx
-c_readstr (const char *str, scalar_int_mode mode)
+   GET_MODE_BITSIZE (MODE) bits from string constant STR.  If
+   NULL_TERMINATED_P, reading stops after '\0' character, all further ones
+   are assumed to be zero, otherwise it reads as many characters
+   as needed.  */
+
+rtx
+c_readstr (const char *str, scalar_int_mode mode,
+  bool null_terminated_p/*=true*/)
 {
   HOST_WIDE_INT ch;
   unsigned int i, j;
@@ -830,7 +833,7 @@ c_readstr (const char *str, scalar_int_m
j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1;
   j *= BITS_PER_UNIT;
 
-  if (ch)
+  if (ch || !null_terminated_p)
ch = (unsigned char) str[i];
   tmp[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT);
 }
--- gcc/expr.c.jj   2019-02-08 20:00:40.309835608 +0100
+++ gcc/expr.c  2019-02-15 11:37:18.715719809 +0100
@@ -5453,6 +5453,30 @@ emit_storent_insn (rtx to, rtx from)
   return maybe_expand_insn (code, 2, ops);
 }
 
+/* Helper function for store_expr storing of STRING_CST.  */
+
+static rtx
+string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode)
+{
+  tree str = (tree) data;
+
+  gcc_assert (offset >= 0);
+  if (offset >= TREE_STRING_LENGTH (str))
+return const0_rtx;
+
+  if ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode)
+  > (unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (str))
+{
+  char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
+  size_t l = TREE_STRING_LENGTH (str) - offset;
+  memcpy (p, TREE_STRING_POINTER (str) + offset, l);
+  memset (p + l, '\0', GET_MODE_SIZE (mode) - l);
+  return c_readstr (p, mode, false);
+}
+
+  return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false);
+}
+
 /* Generate code for computing expression EXP,
and storing the value into TARGET.
 
@@ -5472,7 +5496,7 @@ emit_storent_insn (rtx to, rtx from)
 
 rtx
 store_expr (tree exp, rtx target, int call_param_p,
-   bool nontemporal, bool reverse)
+   bool nontemporal, bool reverse)
 {
   rtx temp;
   rtx alt_rtl = NULL_RTX;
@@ -5606,36 +5630,32 @@ 

[PATCH] Teach evrp that main's argc argument is always non-negative for C family (PR tree-optimization/89350)

2019-02-15 Thread Jakub Jelinek
Hi!

Both the C and C++ standard guarantee that the argc argument to main is
non-negative, the following patch sets (or adjusts) the corresponding
SSA_NAME_RANGE_INFO.  While main is just one, with IPA VRP it can also
propagate etc.  I had to change one testcase because it started optimizing
it better (the test has been folded away), so no sinking was done.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-16  Jakub Jelinek  

PR tree-optimization/89350
* gimple-ssa-evrp.c: Include tree-dfa.h and langhooks.h.
(maybe_set_main_argc_range): New function.
(execute_early_vrp): Call it.

* gcc.dg/tree-ssa/vrp122.c: New test.
* gcc.dg/tree-ssa/ssa-sink-3.c (main): Rename to ...
(bar): ... this.

--- gcc/gimple-ssa-evrp.c.jj2019-01-01 12:37:15.712998659 +0100
+++ gcc/gimple-ssa-evrp.c   2019-02-15 09:49:56.768534668 +0100
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.
 #include "tree-cfgcleanup.h"
 #include "vr-values.h"
 #include "gimple-ssa-evrp-analyze.h"
+#include "tree-dfa.h"
+#include "langhooks.h"
 
 class evrp_folder : public substitute_and_fold_engine
 {
@@ -291,6 +293,39 @@ evrp_dom_walker::cleanup (void)
   evrp_folder.vr_values->cleanup_edges_and_switches ();
 }
 
+/* argc in main in C/C++ is guaranteed to be non-negative.  Adjust the
+   range info for it.  */
+
+static void
+maybe_set_main_argc_range (void)
+{
+  if (!DECL_ARGUMENTS (current_function_decl)
+  || !(lang_GNU_C () || lang_GNU_CXX () || lang_GNU_OBJC ()))
+return;
+
+  tree argc = DECL_ARGUMENTS (current_function_decl);
+  if (TYPE_MAIN_VARIANT (TREE_TYPE (argc)) != integer_type_node)
+return;
+
+  argc = ssa_default_def (cfun, argc);
+  if (argc == NULL_TREE)
+return;
+
+  wide_int min, max;
+  value_range_kind kind = get_range_info (argc, , );
+  if (kind == VR_VARYING)
+{
+  min = wi::zero (TYPE_PRECISION (integer_type_node));
+  max = wi::to_wide (TYPE_MAX_VALUE (integer_type_node));
+}
+  else if (kind == VR_RANGE && wi::neg_p (min) && !wi::neg_p (max))
+min = wi::zero (TYPE_PRECISION (integer_type_node));
+  else
+return;
+
+  set_range_info (argc, VR_RANGE, min, max);
+}
+
 /* Main entry point for the early vrp pass which is a simplified non-iterative
version of vrp where basic blocks are visited in dominance order.  Value
ranges discovered in early vrp will also be used by ipa-vrp.  */
@@ -307,6 +342,10 @@ execute_early_vrp ()
   scev_initialize ();
   calculate_dominance_info (CDI_DOMINATORS);
 
+  /* argc in main in C/C++ is guaranteed to be non-negative.  */
+  if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
+maybe_set_main_argc_range ();
+
   /* Walk stmts in dominance order and propagate VRP.  */
   evrp_dom_walker walker;
   walker.walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
--- gcc/testsuite/gcc.dg/tree-ssa/vrp122.c.jj   2019-02-15 09:54:07.016357759 
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/vrp122.c  2019-02-15 09:53:59.299486561 
+0100
@@ -0,0 +1,14 @@
+/* PR tree-optimization/89350 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "link_error \\\(" "optimized" } } */
+
+extern void link_error (void);
+
+int
+main (int argc, const char *argv[])
+{
+  if (argc < 0)
+link_error ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c.jj   2015-05-29 
15:03:44.947546711 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c  2019-02-16 08:04:29.951126611 
+0100
@@ -2,7 +2,7 @@
 /* { dg-options "-O2 -fdump-tree-sink-stats" } */
 extern void foo(int a);
 int
-main (int argc)
+bar (int argc)
 {
   int a;
   a = argc + 1;

Jakub


Re: C++ PATCH for c++/89217 - ICE with list-initialization in range-based for loop

2019-02-15 Thread Jason Merrill

On 2/11/19 6:03 PM, Marek Polacek wrote:

On Mon, Feb 11, 2019 at 01:43:36PM -0500, Jason Merrill wrote:

On 2/7/19 6:02 PM, Marek Polacek wrote:

Since r268321 we can call digest_init even in a template, when the compound
literal isn't instantiation-dependent.


Right.  And since digest_init modifies the CONSTRUCTOR in place, that means
the template trees are digested rather than the original parse trees that we
try to use.  If we're going to use digest_init, we should probably save
another CONSTRUCTOR with the original trees.


I tried unsharing the constructor and even its contents but only then did I
realize that this cannot work.


Why wouldn't going back to saving {*((struct S *) this)->r} work?


It's not digest_init that adds the problematic
INDIRECT_REF via convert_from_reference, it's instantiate_pending_templates
-> tsubst_expr -> ... -> finish_non_static_data_member.

So the problem isn't sharing the contents of the CONSTRUCTOR, but rather what
finish_non_static_data_member does with the

   {.r=(struct R &) (struct R *) ((struct S *) this)->r}

expression.  The same problem would appear even before r268321 changes if we
called tsubst_* twice on the CONSTRUCTOR above.


Yes, it sounds like there's a bug in that path as well.  Perhaps 
tsubst_copy_and_build/COMPONENT_REF should strip a REFERENCE_REF_P if t 
was already a reference.



Do you still think digest_init and/or finish_compound_literal need tweaking?


I imagine that saving post-digest trees might cause other problems, but 
perhaps not.  Perhaps we ought to move away more generally from trying 
to save the original parse trees for non-dependent expressions and 
messing with NON_DEPENDENT_EXPR.


Jason


Re: [PATCH] v2: Fix excess warnings from -Wtype-limits with location wrappers (PR c++/88680)

2019-02-15 Thread Jason Merrill

On 2/14/19 4:20 PM, David Malcolm wrote:

On Thu, 2019-02-14 at 17:32 +0100, Jakub Jelinek wrote:

On Thu, Feb 14, 2019 at 11:26:15AM -0500, David Malcolm wrote:

There's an asymmetry in the warning; it's looking for a comparison
of a
LHS expression against an RHS constant 0, spelled as "0".

If we fold_for_warn on the RHS, then that folding introduces a
warning
for expressions that aren't spelled as "0" but can be folded to 0,
e.g., with:

enum { FOO, BAR };


So, shouldn't it be made symmetric?  Check if one argument is literal
0
before folding, and only if it is, fold_for_warn the other argument?

Jakub


The reference to symmetry in my earlier email was somewhat
misleading, sorry.

The test happens after a canonicalization of the ordering happens
here, near the top of shorten_compare:

   /* If first arg is constant, swap the args (changing operation
  so value is preserved), for canonicalization.  Don't do this if
  the second arg is 0.  */

so this already gives us symmetry.

Here's an updated version of the patch which add the fold_for_warn in
a slightly later place, and adds a comment, and some more test cases.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?


OK.

Jason



Re: [C++ PATCH] preview: Fix braces around scalar initializer (C++/88572) Inbox x

2019-02-15 Thread Jason Merrill

On 2/14/19 7:09 PM, will wray wrote:

Thanks Jason.
Adding this 'else if' condition afterwards seems to work:

  else if (BRACE_ENCLOSED_INITIALIZER_P (CONSTRUCTOR_ELT
(stripped_init,0)->value))
{
   if (complain & tf_error)
  error ("too many braces around scalar initializer for
type %qT", type);
   init = error_mark_node;
 }

I'll regtest that and run through the rest of the reshape logic again.


I think the first_initializer_p check should be part of this condition 
rather than the C++98 condition.

What do you think about the fact that this patch now rejects empty
brace inits like int{{}}
that was previously accepted? It's a breaking change for any code that
was incorrectly doing that.


The change makes sense to me; I would hope that such code is rare.

Jason


On Thu, Feb 14, 2019 at 6:02 PM Jason Merrill  wrote:


On 2/12/19 6:04 PM, will wray wrote:

A proposed patch for Bug 88572 is attached to the bug report along
with a short description and Change Log (a link there gives a pretty
diff of the patch):

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88572#c15

I'd appreciate any review of this patch, as well as testing on more
platforms. The patch with updated tests passes for me on x86_64.

There's also test code in bug comment #1 that demonstrates SFINAE
based on the nesting of braces. It could also be added to the
testsuite - I'm not sure how to do that or if it is needed.



+ if (cxx_dialect < cxx11 || first_initializer_p)


I would expect this to miss the error in

struct A { int i; } a = {{{42}}};

I see that we end up complaining about this in convert_like_real because
implicit_conversion catches the problem here, but I think we ought to
catch it in reshape_init_r as well.  So, also complain if the element of
the CONSTRUCTOR is also BRACE_ENCLOSED_INITIALIZER_P.

Jason




Re: [PATCH] document __builtin_is_constant_evaluated

2019-02-15 Thread Sandra Loosemore

On 2/13/19 4:33 PM, Martin Sebor wrote:


Index: gcc/doc/extend.texi
===
--- gcc/doc/extend.texi (revision 268856)
+++ gcc/doc/extend.texi (working copy)
@@ -12890,6 +12890,22 @@ built-in in this case, because it has no opportuni
 optimization.
 @end deftypefn
 
+@deftypefn {Built-in Function} bool __builtin_is_constant_evaluated ()

+The @code{__builtin_is_constant_evaluated} function is available only
+in C++.  Its main use case is to determine whether a @code{constexpr}
+function is being called in a @code{constexpr} context.  A call to
+the function evaluates to a core constant expression with the value
+@code{true} if and only if it occurs within the evaluation of an expression
+or conversion that is manifestly constant-evaluated as defined in the C++
+standard.  Manifestly constant-evaluated contexts include constant-expressions,
+the conditions of @code{constexpr if} statements, constraint-expresions, and


s/expresions/expressions/


+initializers of variables usable in constant expressions.  The built-in is
+intended to be used by implementations of the @code{std::is_constant_evaluated}
+C++ function.  Programs should make use of the latter function rather than
+invoking the built-in directly.  For more details refer to the latest revision
+of the C++ standard.
+@end deftypefn
+
 @deftypefn {Built-in Function} long __builtin_expect (long @var{exp}, long 
@var{c})
 @opindex fprofile-arcs
 You may use @code{__builtin_expect} to provide the compiler with


I think this is generally reasonable (and I agree with the rationale for 
documenting this at all), but I'd like to see this rearranged and 
rephrased to put the most important point (it's an internal hook to 
implement std::is_constant_evaluated and shouldn't be called directly) 
before the technical details, with a paragraph break in between.


-Sandra


Re: [PATCH] document __has_attribute and __has_include

2019-02-15 Thread Sandra Loosemore

On 2/13/19 2:46 PM, Martin Sebor wrote:

The attached patch adds documentation for the __has_attribute (and
__has_cpp_attribute) and __has_include operators added in r215752.


Thanks!


I was a little unsure where to add this, whether the preprocessor
manual or the GCC manual, or both.  It seems that it belongs in
the preprocessor manual but since more users read the GCC manual,
it's likely to be overlooked there.


I think the preprocessor manual is the right place.  A while back I 
brought up the idea of consolidating the preprocessor docs into the GCC 
manual but the consensus seemed to be for retaining a separate 
preprocessor manual.


My comments on this patch are mostly trivial markup things.


@@ -3422,6 +3425,99 @@ condition succeeds after the original @samp{#if} a
 @samp{#else} is allowed after any number of @samp{#elif} directives, but
 @samp{#elif} may not follow @samp{#else}.
 
+@node __has_attribute

+@subsection __has_attribute


Please use @code markup in the @subsection.


+@cindex @code{__has_attribute}
+
+The special operator @code{__has_attribute (operand)} may be used in


@code{__has_attribute (@var{operand})}


+@samp{#if} and @samp{#elif} expressions to test whether the attribute
+referenced by its argument is recognized by GCC.  Using the operator
+in other contexts is not valid.  In C code, @var{operand} must be
+a valid identifier.  In C++ code, @var{operand} may be optionally
+introduced by the @code{attribute-scope::} prefix.


I think "attribute-scope" is not a literal part of the prefix, so

@code{@var{attribute-scope}::}


+The @code{attribute-scope} prefix identifies the ``namespace'' within


And @var markup here, too.


+which the attribute is recognized.  The scope of GCC attributes is
+@samp{gnu} or @samp{__gnu__}.  The operator by itself, without any


The @code{__has_attribute} operator by itself


+@var{operand} or parentheses, acts as a predefined macro so that support
+for it can be tested in portable code.  Thus, the recommended use of
+the operator is as follows:
+
+@smallexample
+#if defined __has_attribute
+#  if __has_attribute (nonnull)
+#define ATTR_NONNULL __attribute__ ((nonnull))
+#  endif
+#endif
+@end smallexample
+
+The first @samp{#if} test succeeds only when the operator is supported
+by the version of GCC (or another compiler) being used.  Only when that
+test succeeds is it valid to use @code{__has_attribute} as a preprocessor
+operator.  As a result, combining the two tests into a single expression as
+shown below would only be valid with a compiler that supports the operator
+but not with others that don't.
+
+@smallexample
+#if defined __has_attribute && __has_attribute (nonnull)   /* not portable */
+@dots{}
+#endif
+@end smallexample
+
+@node __has_cpp_attribute
+@subsection __has_cpp_attribute


@code markup in the @subsection title, again.


+@cindex @code{__has_cpp_attribute}
+
+The special operator @code{__has_cpp_attribute (operand)} may be used


@var{operand} markup again.


+in @samp{#if} and @samp{#elif} expressions in C++ code to test whether
+the attribute referenced by its argument is recognized by GCC.
+@code{__has_cpp_attribute (operand)} is equivalent to
+@code{__has_attribute (operand)} except that when @code{operand}


The 3 instances above too.


+designates a supported standard attribute it evaluates to an integer
+constant of the form @code{MM} indicating the year and month when
+the attribute was first introduced into the C++ standard.  For additional
+information including the dates of the introduction of current standard
+attributes, see 
@w{@uref{https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations/,
+SD-6: SG10 Feature Test Recommendations}}.
+
+@node __has_include
+@subsection __has_include


@code markup in title again


+@cindex @code{__has_include}
+ > +The special operator @code{__has_include (operand)} may be used in 

@samp{#if}

@var{operand}


+and @samp{#elif} expressions to test whether the header referenced by its
+@var{operand} can be included using the @samp{#include} directive.  Using
+the operator in other contexts is not valid.  The @var{operand} takes
+the same form as the file in the @samp{#include} directive (@xref{Include
+Syntax}) and evaluates to a nonzero value if the header can be included and
+to zero otherwise.  Note that that the ability to include a header doesn't
+imply that the header doesn't contain invalid constructs or @samp{#error}
+directives that would cause the preprocessor to fail.
+
+The @code{__has_include} operator by itself, without any @var{operand} or
+parentheses, acts as a predefined macro so that support for it can be tested
+in portable code.  Thus, the recommended use of the operator is as follows:
+
+@smallexample
+#if defined __has_include
+#  if __has_include ()
+#include 
+#  endif
+#endif
+@end smallexample
+
+The first @samp{#if} test succeeds only when the operator is supported
+by the version of GCC (or another 

Re: [PATCH doc] correct/expand -Wreturn-type

2019-02-15 Thread Sandra Loosemore

On 2/6/19 11:15 AM, Martin Sebor wrote:

[snip]
But whatever.  Attached is a change with the subsentences reversed.


This version of the patch is OK.

-Sandra


Re: [PATCH doc] correct/improve -Wmissing-attributes and -Wattribute-alias

2019-02-15 Thread Sandra Loosemore

On 2/6/19 9:16 AM, Martin Sebor wrote:

The manual documents the -Wno-missing-attributes form of the option
as if it was enabled by default, even though it's enabled by -Wall
(I can't get this -Wno- convention straight in my head).  I also
got private comments on the documentation of the option suggesting
to add cross-references, and to list the attributes
-Wattribute-alias considers (the same ones as -Wmissing-attributes).

The attached patch makes these changes.


I found the discussion of both options incomprehensible even with this 
patch.  :-(  The defaults are incorrect, there are typos, awkward 
wording and confusing paragraph organization, etc.  So I consulted the 
sources and came up with the attached alternative patch.  Can you review 
this for correctness and generally making sense?


-Sandra
2019-02-15  Sandra Loosemore  
	Martin Sebor  

	gcc/
	* c-family/c.opt (Wmissing-attributes): Clean up doc string.
	* common.opt (Wattribute-alias): Likewise.
	* doc/invoke.texi (Option Summary): List general form of
	-Wattribute-alias=.  List positive form of -Wmissing-attributes.
	(-Wmissing-attributes): Invert entry, rewrite and correct default.
	Add cross-references.
	(-Wattribute-alias): Rewrite and correct default.  Mention
	considered attributes (same as for -Wmissing-attributes).
Index: gcc/c-family/c.opt
===
--- gcc/c-family/c.opt	(revision 268948)
+++ gcc/c-family/c.opt	(working copy)
@@ -818,7 +818,7 @@ Warn on primary template declaration.
 Wmissing-attributes
 C ObjC C++ ObjC++ Var(warn_missing_attributes) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
 Warn about declarations of entities that may be missing attributes
-that related entities have been declared with it.
+that related entities have been declared with.
 
 Wmissing-format-attribute
 C ObjC C++ ObjC++ Warning Alias(Wsuggest-attribute=format)
Index: gcc/common.opt
===
--- gcc/common.opt	(revision 268948)
+++ gcc/common.opt	(working copy)
@@ -552,11 +552,11 @@ Warn about inappropriate attribute usage
 
 Wattribute-alias
 Common Alias(Wattribute_alias=, 1, 0) Warning
-Warn about type safety and similar errors and mismatches in attribute alias and related.
+Warn about type safety and similar errors and mismatches in declarations with alias attributes.
 
 Wattribute-alias=
 Common Joined RejectNegative UInteger Var(warn_attribute_alias) Init(1) Warning IntegerRange(0, 2)
-Warn about type safety and similar errors and mismatches in attribute alias and related.
+Warn about type safety and similar errors and mismatches in declarations with alias attributes.
 
 Wcannot-profile
 Common Var(warn_cannot_profile) Init(1) Warning
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi	(revision 268948)
+++ gcc/doc/invoke.texi	(working copy)
@@ -288,7 +288,7 @@ Objective-C and Objective-C++ Dialects}.
 -Walloc-zero  -Walloc-size-larger-than=@var{byte-size} @gol
 -Walloca  -Walloca-larger-than=@var{byte-size} @gol
 -Wno-aggressive-loop-optimizations  -Warray-bounds  -Warray-bounds=@var{n} @gol
--Wno-attributes  -Wno-attribute-alias @gol
+-Wno-attributes  -Wattribute-alias=@var{n}  @gol
 -Wbool-compare  -Wbool-operation @gol
 -Wno-builtin-declaration-mismatch @gol
 -Wno-builtin-macro-redefined  -Wc90-c99-compat  -Wc99-c11-compat @gol
@@ -322,7 +322,7 @@ Objective-C and Objective-C++ Dialects}.
 -Winvalid-pch  -Wlarger-than=@var{byte-size} @gol
 -Wlogical-op  -Wlogical-not-parentheses  -Wlong-long @gol
 -Wmain  -Wmaybe-uninitialized  -Wmemset-elt-size  -Wmemset-transposed-args @gol
--Wmisleading-indentation  -Wno-missing-attributes  -Wmissing-braces @gol
+-Wmisleading-indentation  -Wmissing-attributes  -Wmissing-braces @gol
 -Wmissing-field-initializers  -Wmissing-format-attribute @gol
 -Wmissing-include-dirs  -Wmissing-noreturn  -Wmissing-profile @gol
 -Wno-multichar  -Wmultistatement-macros  -Wnonnull  -Wnonnull-compare @gol
@@ -5056,7 +5056,7 @@ about the layout of the file that the di
 
 This warning is enabled by @option{-Wall} in C and C++.
 
-@item -Wno-missing-attributes
+@item -Wmissing-attributes
 @opindex Wmissing-attributes
 @opindex Wno-missing-attributes
 Warn when a declaration of a function is missing one or more attributes
@@ -5064,10 +5064,10 @@ that a related function is declared with
 affect the correctness or efficiency of generated code.  For example,
 the warning is issued for declarations of aliases that use attributes
 to specify less restrictive requirements than those of their targets.
-This typically represents a potential optimization oportunity rather
-than a hidden bug.  The @option{-Wattribute-alias} option controls warnings
-issued for mismatches between declarations of aliases and their targets
-that might be indicative of code generation bugs.
+This typically represents a potential optimization opportunity.
+By contrast, 

[PATCH 32/42] i386: Emulate MMX pshufb with SSE version

2019-02-15 Thread H.J. Lu
Emulate MMX version of pshufb with SSE version by masking out the bit 3
of the shuffle control byte.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pshufbv8qi3): Changed to
define_insn_and_split.  Also allow TARGET_MMX_WITH_SSE.  Add
SSE emulation.
---
 gcc/config/i386/sse.md | 46 +-
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 80b1a46f507..704e211c0b8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15697,17 +15697,45 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "")])
 
-(define_insn "ssse3_pshufbv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-UNSPEC_PSHUFB))]
-  "TARGET_SSSE3"
-  "pshufb\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+(define_insn_and_split "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" 
"ym,x,Yv")]
+UNSPEC_PSHUFB))
+   (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pshufb\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 3)
+   (and:V4SI (match_dup 3) (match_dup 2)))
+   (set (match_dup 0)
+   (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
+{
+  /* Emulate MMX version of pshufb with SSE version by masking out the
+ bit 3 of the shuffle control byte.  */
+  operands[0] = lowpart_subreg (V16QImode, operands[0],
+   GET_MODE (operands[0]));
+  operands[1] = lowpart_subreg (V16QImode, operands[1],
+   GET_MODE (operands[1]));
+  operands[2] = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  operands[4] = lowpart_subreg (V16QImode, operands[3],
+   GET_MODE (operands[3]));
+  rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7));
+  rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
+  operands[5] = force_const_mem (V4SImode, vec_const);
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_psign3"
   [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
-- 
2.20.1



Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE

2019-02-15 Thread H.J. Lu
On Fri, Feb 15, 2019 at 9:50 AM Uros Bizjak  wrote:
>
> On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu  wrote:
> >
> > On x86-64, since __m64 is returned and passed in XMM registers, we can
> > emulate MMX intrinsics with SSE instructions. To support it, we added
> >
> >  #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)
> >
> > ;; Define instruction set of MMX instructions
> > (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
> >   (const_string "base"))
> >
> >  (eq_attr "mmx_isa" "native")
> >(symbol_ref "!TARGET_MMX_WITH_SSE")
> >  (eq_attr "mmx_isa" "x64")
> >(symbol_ref "TARGET_MMX_WITH_SSE")
> >  (eq_attr "mmx_isa" "x64_avx")
> >(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
> >  (eq_attr "mmx_isa" "x64_noavx")
> >(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
> >
> > We added SSE emulation to MMX patterns and disabled MMX alternatives with
> > TARGET_MMX_WITH_SSE.
> >
> > Most of MMX instructions have equivalent SSE versions and results of some
> > SSE versions need to be reshuffled to the right order for MMX.  Thee are
> > couple tricky cases:
> >
> > 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
> > maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
> > mask operand and handle unmapped bits 64:127 at memory address by
> > adjusting source and mask operands together with memory address.
> >
> > 2. MMX movntq is emulated with SSE2 DImode movnti, which is available
> > in 64-bit mode.
> >
> > 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
> > SSE emulation must clear the bit 4 in the shuffle control mask.
> >
> > 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
> > the upper 64 bits of destination XMM register.
> >
> > Tests are also added to check each SSE emulation of MMX intrinsics.
> >
> > There are no regressions on i686 and x86-64.  For x86-64, GCC is also
> > tested with
> >
> > --with-arch=native --with-cpu=native
> >
> > on AVX2 and AVX512F machines.
>
> I went through the code again, and looks OK in general, modulo
> mmx_nonimmediate_operand issue and a couple of minor issues.
>
> Please substitute nonimmediate_operand predicate with
> mmx_nonimmediate_operand in expanders and insn patterns. Please note
> that the proposed convention is to name the operand
> register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest
> we name the predicate in this way.
>
> There is an issue with a change to emms pattern.
>
> And let's remove _mm_empty () calls from testcases; they complicate
> things too much for no apparent benefit.
>
> With those issues fixed, the patchset is OK for gcc-10 when it opens.

The new patch set starts at

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01275.html

including

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01271.html

for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89372

-- 
H.J.


[PATCH 40/42] i386: Allow MMX intrinsic emulation with SSE

2019-02-15 Thread H.J. Lu
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3.  Don't enable MMX ISA
by default with TARGET_MMX_WITH_SSE.

For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
mode since MMX intrinsics can be emulated wit SSE.

gcc/

PR target/89021
* config/i386/i386-builtin.def: Enable MMX intrinsics with
SSE/SSE2/SSSE3.
* config/i386/i386.c (ix86_init_mmx_sse_builtins): Likewise.
(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
intrinsics with TARGET_MMX_WITH_SSE.
* config/i386/mmintrin.h: Only require SSE2 if __MMX_WITH_SSE__
is defined.

gcc/testsuite/

PR target/89021
* gcc.target/i386/pr82483-1.c: Error only on ia32.
* gcc.target/i386/pr82483-2.c: Likewise.
---
 gcc/config/i386/i386-builtin.def  | 126 +++---
 gcc/config/i386/i386.c|  29 -
 gcc/config/i386/mmintrin.h|  12 ++-
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 5 files changed, 101 insertions(+), 70 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4687f..10a9d631f29 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", 
IX86_BUILTIN_FNSTSW, UNKN
 BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, 
UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", 
IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, 
"__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* 3DNow! */
 BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", 
IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", 
IX86_BUILTIN_RORQI, UNKNO
 BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, 
UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", 
IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", 
IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", 
IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", 
IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", 
IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", 
IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, 
"__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, 
"__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, 
"__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, 
"__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, 
"__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, 
"__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, 
"__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, 
"__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", 
IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, 
"__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", 
IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, 
"__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", 
IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", 
IX86_BUILTIN_PXOR, UNKNOWN, (int) 

[PATCH 26/42] i386: Emulate MMX umulv1siv1di3 with SSE2

2019-02-15 Thread H.J. Lu
Emulate MMX umulv1siv1di3 with SSE2.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (sse2_umulv1siv1di3): Add SSE emulation
support.
(*sse2_umulv1siv1di3): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 993ad99a36e..9cf0251293a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -905,30 +905,36 @@
 (mult:V1DI
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand")
+ (match_operand:V2SI 1 "register_mmxmem_operand")
  (parallel [(const_int 0)])))
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand")
+ (match_operand:V2SI 2 "register_mmxmem_operand")
  (parallel [(const_int 0)])]
-  "TARGET_SSE2"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
 
 (define_insn "*sse2_umulv1siv1di3"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
 (mult:V1DI
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv")
  (parallel [(const_int 0)])))
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
  (parallel [(const_int 0)])]
-  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && TARGET_SSE2
+   && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+  "@
+   pmuludq\t{%2, %0|%0, %2}
+   pmuludq\t{%2, %0|%0, %2}
+   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 22/42] i386: Emulate MMX mmx_uavgv8qi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_uavgv8qi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_uavgv8qi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(*mmx_uavgv8qi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index d78c6a31962..570153521a1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1678,50 +1678,55 @@
(plus:V8HI
  (plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand"))
+ (match_operand:V8QI 1 "register_mmxmem_operand"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand")))
+ (match_operand:V8QI 2 "register_mmxmem_operand")))
  (const_vector:V8HI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "TARGET_SSE || TARGET_3DNOW"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
 
 (define_insn "*mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(truncate:V8QI
  (lshiftrt:V8HI
(plus:V8HI
  (plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_vector:V8HI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "(TARGET_SSE || TARGET_3DNOW)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V8QImode, operands)"
 {
   /* These two instructions have the same operation, but their encoding
  is different.  Prefer the one that is de facto standard.  */
-  if (TARGET_SSE || TARGET_3DNOW_A)
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
+  else if (TARGET_SSE || TARGET_3DNOW_A)
 return "pavgb\t{%2, %0|%0, %2}";
   else
 return "pavgusb\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "mmxshft")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
(set (attr "prefix_extra")
  (if_then_else
(not (ior (match_test "TARGET_SSE")
 (match_test "TARGET_3DNOW_A")))
(const_string "1")
(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 28/42] i386: Emulate MMX ssse3_phwv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_phwv4hi3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phwv4hi3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 06c9b5b58f1..38b83c57ffc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15232,13 +15232,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "0")
+   (match_operand:V4HI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
@@ -15247,19 +15247,37 @@
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))]
-  "TARGET_SSSE3"
-  "phw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   phw\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = lowpart_subreg (V8HImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx op1 = lowpart_subreg (V8HImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx op2 = lowpart_subreg (V8HImode, operands[2],
+   GET_MODE (operands[2]));
+  emit_insn (gen_ssse3_phwv8hi3 (op0, op1, op2));
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_phdv8si3"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
-- 
2.20.1



[PATCH 27/42] i386: Make _mm_empty () as NOP when MMX is disabled

2019-02-15 Thread H.J. Lu
With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
when MMX is disabled.

PR target/89021
* config/i386/mmx.md (mmx_): Renamed to ...
(mmx__1): This.
(mmx_): New expander.
---
 gcc/config/i386/mmx.md | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9cf0251293a..0f925c0b1ea 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1848,7 +1848,34 @@
   [(UNSPECV_EMMS "emms")
(UNSPECV_FEMMS "femms")])
 
-(define_insn "mmx_"
+(define_expand "mmx_"
+  [(unspec_volatile [(const_int 0)] EMMS)
+   (clobber (reg:XF ST0_REG))
+   (clobber (reg:XF ST1_REG))
+   (clobber (reg:XF ST2_REG))
+   (clobber (reg:XF ST3_REG))
+   (clobber (reg:XF ST4_REG))
+   (clobber (reg:XF ST5_REG))
+   (clobber (reg:XF ST6_REG))
+   (clobber (reg:XF ST7_REG))
+   (clobber (reg:DI MM0_REG))
+   (clobber (reg:DI MM1_REG))
+   (clobber (reg:DI MM2_REG))
+   (clobber (reg:DI MM3_REG))
+   (clobber (reg:DI MM4_REG))
+   (clobber (reg:DI MM5_REG))
+   (clobber (reg:DI MM6_REG))
+   (clobber (reg:DI MM7_REG))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+{
+   if (TARGET_MMX)
+ emit_insn (gen_mmx__1 ());
+   else
+ emit_insn (gen_nop ());
+   DONE;
+})
+
+(define_insn "mmx__1"
   [(unspec_volatile [(const_int 0)] EMMS)
(clobber (reg:XF ST0_REG))
(clobber (reg:XF ST1_REG))
-- 
2.20.1



[PATCH 12/42] i386: Emulate MMX vec_dupv2si with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX vec_dupv2si with SSE.  Add the "Yw" constraint to allow
broadcast from integer register for AVX512BW with TARGET_AVX512VL.
Only SSE register source operand is allowed.

PR target/89021
* config/i386/constraints.md (Yw): New constraint.
* config/i386/mmx.md (*vec_dupv2si): Changed to
define_insn_and_split and also allow TARGET_MMX_WITH_SSE to
support SSE emulation.
---
 gcc/config/i386/constraints.md |  6 ++
 gcc/config/i386/mmx.md | 24 +---
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 16075b4acf3..c546b20d9dc 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,6 +110,8 @@
 ;;  v  any EVEX encodable SSE register for AVX512VL target,
 ;; otherwise any SSE register
 ;;  h  EVEX encodable SSE register with number factor of four
+;;  w  any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
+;; target.
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -146,6 +148,10 @@
  "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
  "@internal For AVX512VL, any EVEX encodable SSE register 
(@code{%xmm0-%xmm31}), otherwise any SSE register.")
 
+(define_register_constraint "Yw"
+ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW 
with TARGET_AVX512VL target.")
+
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
 ;;  g  GOT memory operand.
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b0c6a8c8077..d568a534956 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1381,14 +1381,24 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI")])
 
-(define_insn "*vec_dupv2si"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
(vec_duplicate:V2SI
- (match_operand:SI 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+ (match_operand:SI 1 "register_operand" "0,0,Yv,r")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (vec_duplicate:V4SI (match_dup 1)))]
+  "operands[0] = lowpart_subreg (V4SImode, operands[0],
+GET_MODE (operands[0]));"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI,TI")])
 
 (define_insn "*mmx_concatv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,y")
-- 
2.20.1



[PATCH 37/42] Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE

2019-02-15 Thread H.J. Lu
From: Uros Bizjak 

2019-02-14  Uroš Bizjak  

PR target/89021
* config/i386/i386.md (*zero_extendsidi2): Add mmx_isa attribute.
* config/i386/sse.md (*vec_concatv2sf_sse4_1): Ditto.
(*vec_concatv2sf_sse): Ditto.
(*vec_concatv2si_sse4_1): Ditto.
(*vec_concatv2si): Ditto.
(*vec_concatv4si_0): Ditto.
(*vec_concatv2di_0): Ditto.
---
 gcc/config/i386/i386.md |  4 
 gcc/config/i386/sse.md  | 16 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e1727676deb..22172fd77a8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3682,6 +3682,10 @@
  (const_string "avx512bw")
   ]
   (const_string "*")))
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "5,6")
+  (const_string "native")
+  (const_string "*")))
(set (attr "type")
  (cond [(eq_attr "alternative" "0,1,2,4")
  (const_string "multi")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 379da16615d..b6196b088fd 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -7201,6 +7201,10 @@
  (const_string "mmxmov")
   ]
   (const_string "sselog")))
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "7,8")
+  (const_string "native")
+  (const_string "*")))
(set (attr "prefix_data16")
  (if_then_else (eq_attr "alternative" "3,4")
   (const_string "1")
@@ -7236,7 +7240,8 @@
movss\t{%1, %0|%0, %1}
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+  [(set_attr "mmx_isa" "*,*,native,native")
+   (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "V4SF,SF,DI,DI")])
 
 (define_insn "*vec_concatv4sf"
@@ -14509,6 +14514,10 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
   [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
+   (set (attr "mmx_isa")
+ (if_then_else (eq_attr "alternative" "8,9")
+  (const_string "native")
+  (const_string "*")))
(set (attr "type")
  (cond [(eq_attr "alternative" "7")
  (const_string "ssemov")
@@ -14546,6 +14555,7 @@
punpckldq\t{%2, %0|%0, %2}
movd\t{%1, %0|%0, %1}"
   [(set_attr "isa" "sse2,sse2,*,*,*,*")
+   (set_attr "mmx_isa" "*,*,*,*,native,native")
(set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
 
@@ -14575,7 +14585,8 @@
   "@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
+  [(set_attr "mmx_isa" "*,native")
+   (set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex,orig")
(set_attr "mode" "TI")])
 
@@ -14650,6 +14661,7 @@
%vmovq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
   [(set_attr "isa" "x64,*,*")
+   (set_attr "mmx_isa" "*,*,native")
(set_attr "type" "ssemov")
(set_attr "prefix_rex" "1,*,*")
(set_attr "prefix" "maybe_vex,maybe_vex,orig")
-- 
2.20.1



[PATCH 39/42] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE

2019-02-15 Thread H.J. Lu
PR target/89021
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
mmx_ok to true if TARGET_MMX_WITH_SSE is true.
(ix86_expand_vector_init_one_nonzero): Likewise.
(ix86_expand_vector_init_one_var): Likewise.
(ix86_expand_vector_init_general): Likewise.
(ix86_expand_vector_init): Likewise.
(ix86_expand_vector_set): Likewise.
(ix86_expand_vector_extract): Likewise.
* config/i386/mmx.md (*vec_dupv2sf): Changed to
define_insn_and_split to support SSE emulation.
(*vec_extractv2sf_0): Likewise.
(*vec_extractv2sf_1): Likewise.
(*vec_extractv2si_0): Likewise.
(*vec_extractv2si_1): Likewise.
(*vec_extractv2si_zext_mem): Likewise.
(vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
(vec_extractv2sf_1 splitter): Likewise.
(vec_extractv2sfsf): Likewise.
(vec_setv2si): Likewise.
(vec_extractv2si_1 splitter): Likewise.
(vec_extractv2sisi): Likewise.
(vec_setv4hi): Likewise.
(vec_extractv4hihi): Likewise.
(vec_setv8qi): Likewise.
(vec_extractv8qiqi): Likewise.
---
 gcc/config/i386/i386.c |  8 +
 gcc/config/i386/mmx.md | 69 +++---
 2 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a76c17beece..25e0dc43a9e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42620,6 +42620,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
 {
   bool ok;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SImode:
@@ -42779,6 +42780,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, 
machine_mode mode,
   bool use_vector_set = false;
   rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2DImode:
@@ -42972,6 +42974,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, 
machine_mode mode,
   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2DFmode:
@@ -43357,6 +43360,7 @@ ix86_expand_vector_init_general (bool mmx_ok, 
machine_mode mode,
   machine_mode quarter_mode = VOIDmode;
   int n, i;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SFmode:
@@ -43556,6 +43560,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx 
vals)
   int i;
   rtx x;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
+
   /* Handle first initialization from vector elts.  */
   if (n_elts != XVECLEN (vals, 0))
 {
@@ -43655,6 +43661,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx 
val, int elt)
   machine_mode mmode = VOIDmode;
   rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SFmode:
@@ -44010,6 +44017,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, 
rtx vec, int elt)
   bool use_vec_extr = false;
   rtx tmp;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SImode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index a21e11c8dfb..fa0b0126e91 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -555,14 +555,23 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
 
-(define_insn "*vec_dupv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(vec_duplicate:V2SF
- (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+ (match_operand:SF 1 "register_operand" "0,0,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (vec_duplicate:V4SF (match_dup 1)))]
+  "operands[0] = lowpart_subreg (V4SFmode, operands[0],
+GET_MODE (operands[0]));"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_concatv2sf"
   [(set (match_operand:V2SF 0 "register_operand" "=y,y")
@@ -580,7 +589,7 @@
   [(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (false, operands[0], operands[1],
  INTVAL (operands[2]));
@@ -594,11 +603,13 @@
(vec_select:SF
  (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
  (parallel [(const_int 0)])))]
-  "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && 

[PATCH 41/42] i386: Enable TM MMX intrinsics with SSE2

2019-02-15 Thread H.J. Lu
This pach enables TM MMX intrinsics with SSE2 when MMX is disabled.

PR target/89021
* config/i386/i386.c (bdesc_tm): Enable MMX intrinsics with
SSE2.
---
 gcc/config/i386/i386.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 073a2534d1f..319a98f824a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31065,13 +31065,13 @@ static const struct builtin_description 
bdesc_##kind[] =  \
we're lazy.  Add casts to make them fit.  */
 static const struct builtin_description bdesc_tm[] =
 {
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, 
VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, 
UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, 
UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, 
V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
 
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum 
ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
@@ -31089,7 +31089,7 @@ static const struct builtin_description bdesc_tm[] =
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
 
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, 
VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
 };
-- 
2.20.1



[PATCH 21/42] i386: Emulate MMX maskmovq with SSE2 maskmovdqu

2019-02-15 Thread H.J. Lu
Emulate MMX maskmovq with SSE2 maskmovdqu for TARGET_MMX_WITH_SSE by
zero-extending source and mask operands to 128 bits.  Handle unmapped
bits 64:127 at memory address by adjusting source and mask operands
together with memory address.

PR target/89021
* config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2
maskmovdqu for __MMX_WITH_SSE__.
---
 gcc/config/i386/xmmintrin.h | 61 +
 1 file changed, 61 insertions(+)

diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 58284378514..a915f6c87d7 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
 {
+#ifdef __MMX_WITH_SSE__
+  /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits
+ 64:127 at address __P.  */
+  typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+  typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+  /* Zero-extend __A and __N to 128 bits.  */
+  __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+  __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+
+  /* Check the alignment of __P.  */
+  __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+  if (offset)
+{
+  /* If the misalignment of __P > 8, subtract __P by 8 bytes.
+Otherwise, subtract __P by the misalignment.  */
+  if (offset > 8)
+   offset = 8;
+  __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+
+  /* Shift __A128 and __N128 to the left by the adjustment.  */
+  switch (offset)
+   {
+   case 1:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+ break;
+   case 2:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+ break;
+   case 3:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+ break;
+   case 4:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+ break;
+   case 5:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+ break;
+   case 6:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+ break;
+   case 7:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+ break;
+   case 8:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+ break;
+   default:
+ break;
+   }
+}
+  __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
+#else
   __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+#endif
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-- 
2.20.1



[PATCH 25/42] i386: Emulate MMX movntq with SSE2 movntidi

2019-02-15 Thread H.J. Lu
Emulate MMX movntq with SSE2 movntidi.  Only register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (sse_movntq): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index bcce7c06c4f..993ad99a36e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -214,12 +214,16 @@
 })
 
 (define_insn "sse_movntq"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+  [(set (match_operand:DI 0 "memory_operand" "=m,m")
+   (unspec:DI [(match_operand:DI 1 "register_operand" "y,r")]
   UNSPEC_MOVNTQ))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "movntq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxmov")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   movntq\t{%1, %0|%0, %1}
+   movnti\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxmov,ssemov")
(set_attr "mode" "DI")])
 
 ;
-- 
2.20.1



[PATCH 35/42] i386: Emulate MMX abs2 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX abs2 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/sse.md (abs2): Add SSE emulation.
---
 gcc/config/i386/sse.md | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ec68b5dc2ce..92f5ad17156 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15942,16 +15942,19 @@
 })
 
 (define_insn "abs2"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
(abs:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
-  "TARGET_SSSE3"
-  "pabs\t{%1, %0|%0, %1}";
-  [(set_attr "type" "sselog1")
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pabs\t{%1, %0|%0, %1}
+   %vpabs\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "sselog1")
(set_attr "prefix_rep" "0")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 24/42] i386: Emulate MMX mmx_psadbw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_psadbw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_psadbw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b8983e1755a..bcce7c06c4f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1770,14 +1770,19 @@
(set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
-(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
+(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" 
"ym,x,Yv")]
 UNSPEC_PSADBW))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   psadbw\t{%2, %0|%0, %2}
+   psadbw\t{%2, %0|%0, %2}
+   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 31/42] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_pmulhrswv4hi3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index dc07173cb1c..80b1a46f507 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15652,25 +15652,31 @@
(set_attr "mode" "")])
 
 (define_insn "*ssse3_pmulhrswv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 14))
  (match_operand:V4HI 3 "const1_operand"))
(const_int 1]
-  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "pmulhrsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && TARGET_SSSE3
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   pmulhrsw\t{%2, %0|%0, %2}
+   pmulhrsw\t{%2, %0|%0, %2}
+   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_pshufb3"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
-- 
2.20.1



[PATCH 38/42] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE

2019-02-15 Thread H.J. Lu
PR target/89021
* config/i386/mmx.md (MMXMODE:mov): Also allow
TARGET_MMX_WITH_SSE.
(MMXMODE:*mov_internal): Likewise.
(MMXMODE:movmisalign): Likewise.
---
 gcc/config/i386/mmx.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0f925c0b1ea..a21e11c8dfb 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -70,7 +70,7 @@
 (define_expand "mov"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
@@ -81,7 +81,7 @@
 "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
 "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
-  "TARGET_MMX
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -207,7 +207,7 @@
 (define_expand "movmisalign"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
-- 
2.20.1



[PATCH 36/42] i386: Correct _pmulhrsw3[_mask]

2019-02-15 Thread H.J. Lu
There is no V4HI pmulhrsw in AVX512BW and V4HI/V8HI pmulhrsw don't require
AVX2.  To support TARGET_MMX_WITH_SSE, replace nonimmediate_operand with
register_pmulhrswmem_operand in _pmulhrsw3.

PR target/89372
* config/i386/predicates.md (register_pmulhrswmem_operand): New.
* config/i386/sse.md (PMULHRSW): Remove V4HI.
(PMULHRSW_MMX): New.
(_pmulhrsw3): Replace PMULHRSW with
PMULHRSW_MMX.  Require TARGET_SSSE3, not TARGET_AVX2.  Replace
nonimmediate_operand with register_pmulhrswmem_operand.
---
 gcc/config/i386/predicates.md |  7 +++
 gcc/config/i386/sse.md| 15 +--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index f3c2f72de54..b7cb26a81fe 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -56,6 +56,13 @@
(and (not (match_test "TARGET_MMX_WITH_SSE"))
(match_operand 0 "memory_operand"
 
+;; Match register operands, but include memory operands for
+;; !(TARGET_MMX_WITH_SSE && mode == V4HImode).
+(define_predicate "register_pmulhrswmem_operand"
+  (ior (match_operand 0 "register_operand")
+   (and (not (match_test "TARGET_MMX_WITH_SSE && mode == V4HImode"))
+   (match_operand 0 "memory_operand"
+
 ;; True if the operand is an SSE register.
 (define_predicate "sse_reg_operand"
   (and (match_code "reg")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 92f5ad17156..379da16615d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15579,7 +15579,7 @@
(set_attr "mode" "DI,TI,TI")])
 
 (define_mode_iterator PMULHRSW
-  [V4HI V8HI (V16HI "TARGET_AVX2")])
+  [V8HI (V16HI "TARGET_AVX2")])
 
 (define_expand "_pmulhrsw3_mask"
   [(set (match_operand:PMULHRSW 0 "register_operand")
@@ -15604,21 +15604,24 @@
   ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
 })
 
+(define_mode_iterator PMULHRSW_MMX
+  [V4HI V8HI (V16HI "TARGET_AVX2")])
+
 (define_expand "_pmulhrsw3"
-  [(set (match_operand:PMULHRSW 0 "register_operand")
-   (truncate:PMULHRSW
+  [(set (match_operand:PMULHRSW_MMX 0 "register_operand")
+   (truncate:PMULHRSW_MMX
  (lshiftrt:
(plus:
  (lshiftrt:
(mult:
  (sign_extend:
-   (match_operand:PMULHRSW 1 "nonimmediate_operand"))
+   (match_operand:PMULHRSW_MMX 1 
"register_pmulhrswmem_operand"))
  (sign_extend:
-   (match_operand:PMULHRSW 2 "nonimmediate_operand")))
+   (match_operand:PMULHRSW_MMX 2 
"register_pmulhrswmem_operand")))
(const_int 14))
  (match_dup 3))
(const_int 1]
-  "TARGET_AVX2"
+  "TARGET_SSSE3"
 {
   operands[3] = CONST1_RTX(mode);
   ix86_fixup_binary_operands_no_copy (MULT, mode, operands);
-- 
2.20.1



[PATCH 30/42] i386: Emulate MMX ssse3_pmaddubsw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_pmaddubsw with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pmaddubsw): Add SSE emulation.
---
 gcc/config/i386/sse.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0565ddc177f..dc07173cb1c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15544,17 +15544,17 @@
(set_attr "mode" "TI")])
 
 (define_insn "ssse3_pmaddubsw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(ss_plus:V4HI
  (mult:V4HI
(zero_extend:V4HI
  (vec_select:V4QI
-   (match_operand:V8QI 1 "register_operand" "0")
+   (match_operand:V8QI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0) (const_int 2)
   (const_int 4) (const_int 6)])))
(sign_extend:V4HI
  (vec_select:V4QI
-   (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+   (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)
   (const_int 4) (const_int 6)]
  (mult:V4HI
@@ -15566,13 +15566,17 @@
  (vec_select:V4QI (match_dup 2)
(parallel [(const_int 1) (const_int 3)
   (const_int 5) (const_int 7)]))]
-  "TARGET_SSSE3"
-  "pmaddubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pmaddubsw\t{%2, %0|%0, %2}
+   pmaddubsw\t{%2, %0|%0, %2}
+   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_mode_iterator PMULHRSW
   [V4HI V8HI (V16HI "TARGET_AVX2")])
-- 
2.20.1



[PATCH 29/42] i386: Emulate MMX ssse3_phdv2si3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_phdv2si3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phdv2si3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 38b83c57ffc..0565ddc177f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15356,26 +15356,44 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phdv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phdv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_concat:V2SI
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])]
-  "TARGET_SSSE3"
-  "phd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   phd\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = lowpart_subreg (V4SImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx op1 = lowpart_subreg (V4SImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx op2 = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  emit_insn (gen_ssse3_phdv4si3 (op0, op1, op2));
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_pmaddubsw256"
   [(set (match_operand:V16HI 0 "register_operand" "=x,v")
-- 
2.20.1



[PATCH 33/42] i386: Emulate MMX ssse3_psign3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_psign3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_psign3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 704e211c0b8..c2dbd59049a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15755,17 +15755,21 @@
(set_attr "mode" "")])
 
 (define_insn "ssse3_psign3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(unspec:MMXMODEI
- [(match_operand:MMXMODEI 1 "register_operand" "0")
-  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
  UNSPEC_PSIGN))]
-  "TARGET_SSSE3"
-  "psign\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   psign\t{%2, %0|%0, %2}
+   psign\t{%2, %0|%0, %2}
+   vpsign\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_palignr_mask"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
-- 
2.20.1



[PATCH 34/42] i386: Emulate MMX ssse3_palignrdi with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX version of palignrq with SSE version by concatenating 2
64-bit MMX operands into a single 128-bit SSE operand, followed by
SSE psrldq.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_palignrdi): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 58 ++
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c2dbd59049a..ec68b5dc2ce 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15824,23 +15824,61 @@
(set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "")])
 
-(define_insn "ssse3_palignrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "0")
-   (match_operand:DI 2 "nonimmediate_operand" "ym")
-   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+(define_insn_and_split "ssse3_palignrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
+   (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
+   (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
+   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
   UNSPEC_PALIGNR))]
-  "TARGET_SSSE3"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
 {
-  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
-  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+  switch (which_alternative)
+{
+case 0:
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+case 1:
+case 2:
+  return "#";
+default:
+  gcc_unreachable ();
+}
 }
-  [(set_attr "type" "sseishft")
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+   (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
+{
+  /* Emulate MMX palignrdi with SSE psrldq.  */
+  rtx op0 = lowpart_subreg (V2DImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx insn;
+  if (TARGET_AVX)
+insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
+  else
+{
+  /* NB: SSE can only concatenate OP0 and OP1 to OP0.  */
+  insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
+  emit_insn (insn);
+  /* Swap bits 0:63 with bits 64:127.  */
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2),
+ GEN_INT (3),
+ GEN_INT (0),
+ GEN_INT (1)));
+  rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
+  rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
+  insn = gen_rtx_SET (op1, op2);
+}
+  emit_insn (insn);
+  operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
 ;; modes for abs instruction on pre AVX-512 targets.
-- 
2.20.1



[PATCH 19/42] i386: Emulate MMX mmx_pmovmskb with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb
from QImode to SImode.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmovmskb): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/mmx.md | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 058791e01e6..9c552f929f1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1762,14 +1762,30 @@
   [(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
 
-(define_insn "mmx_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+(define_insn_and_split "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
   UNSPEC_MOVMSK))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pmovmskb\t{%1, %0|%0, %1}
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+   (set (match_dup 0)
+   (zero_extend:SI (match_dup 2)))]
+{
+  /* Generate SSE pmovmskb and zero-extend from QImode to SImode.  */
+  operands[1] = lowpart_subreg (V16QImode, operands[1],
+   GET_MODE (operands[1]));
+  operands[2] = lowpart_subreg (QImode, operands[0],
+   GET_MODE (operands[0]));
+}
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,ssemov")
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_maskmovq"
   [(set (match_operand:V8QI 0 "memory_operand")
-- 
2.20.1



[PATCH 15/42] i386: Emulate MMX sse_cvtpi2ps with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
destination XMM register.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (sse_cvtpi2ps): Changed to
define_insn_and_split.  Also allow TARGET_MMX_WITH_SSE.  Add
SSE emulation.
---
 gcc/config/i386/sse.md | 64 --
 1 file changed, 56 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 70e3669d115..06c9b5b58f1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4561,16 +4561,64 @@
 ;;
 ;
 
-(define_insn "sse_cvtpi2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn_and_split "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
(vec_merge:V4SF
  (vec_duplicate:V4SF
-   (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
- (match_operand:V4SF 1 "register_operand" "0")
- (const_int 3)))]
-  "TARGET_SSE"
-  "cvtpi2ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
+   (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" 
"ym,x,Yv")))
+ (match_operand:V4SF 1 "register_operand" "0,0,Yv")
+ (const_int 3)))
+   (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
+  "TARGET_SSE || TARGET_MMX_WITH_SSE"
+  "@
+   cvtpi2ps\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  rtx op2 = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  /* Generate SSE2 cvtdq2ps.  */
+  rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
+  emit_insn (insn);
+
+  /* Merge operands[3] with operands[0].  */
+  rtx mask, op1;
+  if (TARGET_AVX)
+{
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (1),
+ GEN_INT (6), GEN_INT (7)));
+  op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
+  op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+  insn = gen_rtx_SET (operands[0], op2);
+}
+  else
+{
+  /* NB: SSE can only concatenate OP0 and OP3 to OP0.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (4), GEN_INT (5)));
+  op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
+  op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+  insn = gen_rtx_SET (operands[0], op2);
+  emit_insn (insn);
+
+  /* Swap bits 0:63 with bits 64:127.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (1)));
+  rtx dest = lowpart_subreg (V4SImode, operands[0],
+GET_MODE (operands[0]));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+}
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
 
 (define_insn "sse_cvtps2pi"
-- 
2.20.1



[PATCH 17/42] i386: Emulate MMX mmx_pinsrw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_pinsrw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pinsrw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_pinsrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 22547c7da6f..1e68d1bb338 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1282,32 +1282,45 @@
 (match_operand:SI 2 "nonimmediate_operand"))
  (match_operand:V4HI 1 "register_operand")
   (match_operand:SI 3 "const_0_to_3_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   operands[2] = gen_lowpart (HImode, operands[2]);
   operands[3] = GEN_INT (1 << INTVAL (operands[3]));
 })
 
 (define_insn "*mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
 (vec_merge:V4HI
   (vec_duplicate:V4HI
-(match_operand:HI 2 "nonimmediate_operand" "rm"))
- (match_operand:V4HI 1 "register_operand" "0")
+(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yv")
   (match_operand:SI 3 "const_int_operand")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (V4HImode))"
 {
   operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  if (MEM_P (operands[2]))
-return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+{
+  if (MEM_P (operands[2]))
+   return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  else
+   return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
   else
-return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+{
+  if (MEM_P (operands[2]))
+   return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  else
+   return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_pextrw"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 20/42] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_umulv4hi3_highpart with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check
TARGET_MMX and TARGET_MMX_WITH_SSE.
(*mmx_umulv4hi3_highpart): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9c552f929f1..d78c6a31962 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -781,28 +781,34 @@
  (lshiftrt:V4SI
(mult:V4SI
  (zero_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand"))
+   (match_operand:V4HI 1 "register_mmxmem_operand"))
  (zero_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand")))
+   (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 16]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (zero_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (zero_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_int 16]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "@
+   pmulhuw\t{%2, %0|%0, %2}
+   pmulhuw\t{%2, %0|%0, %2}
+   vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand")
-- 
2.20.1



[PATCH 13/42] i386: Emulate MMX pshufw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX pshufw with SSE.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pshufw): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(mmx_pshufw_1): Add SSE emulation.
(*vec_dupv4hi): Changed to define_insn_and_split and also allow
TARGET_MMX_WITH_SSE to support SSE emulation.
---
 gcc/config/i386/mmx.md | 81 +-
 1 file changed, 65 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index d568a534956..43f85064cd9 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1323,9 +1323,10 @@
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-   (match_operand:V4HI 1 "nonimmediate_operand")
+   (match_operand:V4HI 1 "register_mmxmem_operand")
(match_operand:SI 2 "const_int_operand")]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   int mask = INTVAL (operands[2]);
   emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
@@ -1337,14 +1338,15 @@
 })
 
 (define_insn "mmx_pshufw_1"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
 (vec_select:V4HI
-  (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+  (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yv")
   (parallel [(match_operand 2 "const_0_to_3_operand")
  (match_operand 3 "const_0_to_3_operand")
  (match_operand 4 "const_0_to_3_operand")
  (match_operand 5 "const_0_to_3_operand")])))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
 {
   int mask = 0;
   mask |= INTVAL (operands[2]) << 0;
@@ -1353,11 +1355,20 @@
   mask |= INTVAL (operands[5]) << 6;
   operands[2] = GEN_INT (mask);
 
-  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+  switch (which_alternative)
+{
+case 0:
+  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+case 1:
+  return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+default:
+  gcc_unreachable ();
+}
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_insn "mmx_pswapdv2si2"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -1370,16 +1381,54 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
 
-(define_insn "*vec_dupv4hi"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv,Yw")
(vec_duplicate:V4HI
  (truncate:HI
-   (match_operand:SI 1 "register_operand" "0"]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pshufw\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (match_operand:SI 1 "register_operand" "0,Yv,r"]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pshufw\t{$0, %0, %0|%0, %0, 0}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  rtx op;
+  operands[0] = lowpart_subreg (V8HImode, operands[0],
+   GET_MODE (operands[0]));
+  if (TARGET_AVX2)
+{
+  operands[1] = lowpart_subreg (HImode, operands[1],
+   GET_MODE (operands[1]));
+  op = gen_rtx_VEC_DUPLICATE (V8HImode, operands[1]);
+}
+  else
+{
+  operands[1] = lowpart_subreg (V8HImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (8,
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (0),
+ GEN_INT (4),
+ GEN_INT (5),
+ GEN_INT (6),
+ GEN_INT (7)));
+
+  op = gen_rtx_VEC_SELECT (V8HImode, operands[1], mask);
+}
+  rtx insn = gen_rtx_SET (operands[0], op);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64,x64_avx")
+   (set_attr "type" "mmxcvt,sselog1,ssemov")
+   (set_attr "length_immediate" "1,1,0")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "*vec_dupv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv,Yw")
-- 
2.20.1



[PATCH 14/42] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.

PR target/89021
* config/i386/sse.md (sse_cvtps2pi): Add SSE emulation.
(sse_cvttps2pi): Likewise.
---
 gcc/config/i386/sse.md | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c8e0133560a..70e3669d115 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4574,26 +4574,32 @@
(set_attr "mode" "V4SF")])
 
 (define_insn "sse_cvtps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" 
"xm,YvBm")]
   UNSPEC_FIX_NOTRUNC)
  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvtps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+  "@
+   cvtps2pi\t{%1, %0|%0, %q1}
+   %vcvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
(set_attr "mode" "DI")])
 
 (define_insn "sse_cvttps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
(vec_select:V2SI
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvttps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
+  "@
+   cvttps2pi\t{%1, %0|%0, %q1}
+   %vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
(set_attr "prefix_rep" "0")
(set_attr "mode" "SF")])
 
-- 
2.20.1



[PATCH 23/42] i386: Emulate MMX mmx_uavgv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_uavgv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_uavgv4hi3): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_uavgv4hi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 570153521a1..b8983e1755a 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1735,33 +1735,39 @@
(plus:V4SI
  (plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand"))
+ (match_operand:V4HI 1 "register_mmxmem_operand"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand")))
+ (match_operand:V4HI 2 "register_mmxmem_operand")))
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
 
 (define_insn "*mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V4HImode, operands)"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "@
+   pavgw\t{%2, %0|%0, %2}
+   pavgw\t{%2, %0|%0, %2}
+   vpavgw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
   [(set (match_operand:V1DI 0 "register_operand" "=y")
-- 
2.20.1



[PATCH 05/42] i386: Emulate MMX mulv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_mulv4hi3): Also allow
TARGET_MMX_WITH_SSE.
(mulv4hi3): New.
(*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
support.
---
 gcc/config/i386/mmx.md | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 517c3283963..cdb0f698001 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -716,19 +716,31 @@
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
-  (match_operand:V4HI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
+  (match_operand:V4HI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_expand "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+(mult:V4HI (match_operand:V4HI 1 "register_operand")
+  (match_operand:V4HI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_mulv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
-  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
+(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+  (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmullw\t{%2, %0|%0, %2}
+   pmullw\t{%2, %0|%0, %2}
+   vpmullw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_smulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 06/42] i386: Emulate MMX smulv4hi3_highpart with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_smulv4hi3_highpart): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. Add
SSE support.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index cdb0f698001..3a7964d52bb 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -748,27 +748,32 @@
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand"))
+   (match_operand:V4HI 1 "register_mmxmem_operand"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand")))
+   (match_operand:V4HI 2 "register_mmxmem_operand")))
(const_int 16]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
(const_int 16]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmulhw\t{%2, %0|%0, %2}
+   pmulhw\t{%2, %0|%0, %2}
+   vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_umulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 18/42] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_v4hi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(mmx_v8qi3): Likewise.
(smaxmin:v4hi3): New.
(umaxmin:v8qi3): Likewise.
(smaxmin:*mmx_v4hi3): Add SSE emulation.
(umaxmin:*mmx_v8qi3): Likewise.
---
 gcc/config/i386/mmx.md | 68 +-
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1e68d1bb338..058791e01e6 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -923,40 +923,68 @@
 (define_expand "mmx_v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
 (smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand")
- (match_operand:V4HI 2 "nonimmediate_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+ (match_operand:V4HI 1 "register_mmxmem_operand")
+ (match_operand:V4HI 2 "register_mmxmem_operand")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);")
+
+(define_expand "v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+(smaxmin:V4HI
+ (match_operand:V4HI 1 "register_operand")
+ (match_operand:V4HI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);")
 
 (define_insn "*mmx_v4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
 (smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (, V4HImode, operands)"
-  "pw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   pw\t{%2, %0|%0, %2}
+   pw\t{%2, %0|%0, %2}
+   vpw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
 (umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand")
- (match_operand:V8QI 2 "nonimmediate_operand")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
+ (match_operand:V8QI 1 "register_mmxmem_operand")
+ (match_operand:V8QI 2 "register_mmxmem_operand")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);")
+
+(define_expand "v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+(umaxmin:V8QI
+ (match_operand:V8QI 1 "register_operand")
+ (match_operand:V8QI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);")
 
 (define_insn "*mmx_v8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
 (umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+ (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (, V8QImode, operands)"
-  "pb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   pb\t{%2, %0|%0, %2}
+   pb\t{%2, %0|%0, %2}
+   vpb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_ashr3"
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
-- 
2.20.1



[PATCH 16/42] i386: Emulate MMX mmx_pextrw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_pextrw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 43f85064cd9..22547c7da6f 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1310,16 +1310,20 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
 (zero_extend:SI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "y")
-   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
+   (match_operand:V4HI 1 "register_operand" "y,Yv")
+   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pextrw\t{%2, %1, %0|%0, %1, %2}
+   %vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 11/42] i386: Emulate MMX mmx_eq/mmx_gt3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_eq/mmx_gt3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_eq3): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_eq3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
support.
(mmx_gt3): Likewise.
---
 gcc/config/i386/mmx.md | 43 +-
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 510d453f0fd..b0c6a8c8077 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1017,30 +1017,39 @@
 (define_expand "mmx_eq3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
 (eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand")
- (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);")
 
 (define_insn "*mmx_eq3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (EQ, mode, operands)"
-  "pcmpeq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (EQ, mode, operands)"
+  "@
+   pcmpeq\t{%2, %0|%0, %2}
+   pcmpeq\t{%2, %0|%0, %2}
+   vpcmpeq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_gt3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (gt:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgt\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pcmpgt\t{%2, %0|%0, %2}
+   pcmpgt\t{%2, %0|%0, %2}
+   vpcmpgt\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 10/42] i386: Emulate MMX mmx_andnot3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_andnot3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (mmx_andnot3): Also allow
TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7e2d40313c3..510d453f0fd 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1049,14 +1049,18 @@
 ;
 
 (define_insn "mmx_andnot3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pandn\t{%2, %0|%0, %2}
+   pandn\t{%2, %0|%0, %2}
+   vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
-- 
2.20.1



[PATCH 07/42] i386: Emulate MMX mmx_pmaddwd with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX pmaddwd with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.
(*mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 3a7964d52bb..9f0311badca 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -810,11 +810,11 @@
  (mult:V2SI
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 1 "nonimmediate_operand")
+   (match_operand:V4HI 1 "register_mmxmem_operand")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 2 "nonimmediate_operand")
+   (match_operand:V4HI 2 "register_mmxmem_operand")
(parallel [(const_int 0) (const_int 2)]
  (mult:V2SI
(sign_extend:V2SI
@@ -823,20 +823,20 @@
(sign_extend:V2SI
  (vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_pmaddwd"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
 (plus:V2SI
  (mult:V2SI
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+   (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
(parallel [(const_int 0) (const_int 2)]
  (mult:V2SI
(sign_extend:V2SI
@@ -845,10 +845,15 @@
(sign_extend:V2SI
  (vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmaddwd\t{%2, %0|%0, %2}
+   pmaddwd\t{%2, %0|%0, %2}
+   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmulhrwv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 09/42] i386: Emulate MMX 3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX 3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (any_logic:mmx_3): Also allow
TARGET_MMX_WITH_SSE.
(any_logic:3): New.
(any_logic:*mmx_3): Also allow TARGET_MMX_WITH_SSE.
Add SSE support.
---
 gcc/config/i386/mmx.md | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 240e0188a78..7e2d40313c3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1061,20 +1061,33 @@
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
(any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand")
- (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand")
+ (match_operand:MMXMODEI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 08/42] i386: Emulate MMX ashr3/3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ashr3/3 with SSE.  Only SSE register
source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_ashr3): Also allow
TARGET_MMX_WITH_SSE.  Add SSE emulation.
(mmx_3): Likewise.
(ashr3): New.
(3): Likewise.
---
 gcc/config/i386/mmx.md | 50 ++
 1 file changed, 36 insertions(+), 14 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9f0311badca..240e0188a78 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -959,32 +959,54 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_ashr3"
-  [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv")
 (ashiftrt:MMXMODE24
- (match_operand:MMXMODE24 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
-  "psra\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   psra\t{%2, %0|%0, %2}
+   psra\t{%2, %0|%0, %2}
+   vpsra\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
  (if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
+
+(define_expand "ashr3"
+  [(set (match_operand:MMXMODE24 0 "register_operand")
+(ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_MMX_WITH_SSE")
 
 (define_insn "mmx_3"
-  [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv")
 (any_lshift:MMXMODE248
- (match_operand:MMXMODE248 1 "register_operand" "0")
- (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
+ (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseishft,sseishft")
(set (attr "length_immediate")
  (if_then_else (match_operand 2 "const_int_operand")
(const_string "1")
(const_string "0")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
+
+(define_expand "3"
+  [(set (match_operand:MMXMODE248 0 "register_operand")
+(any_lshift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand")
+ (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_MMX_WITH_SSE")
 
 ;
 ;;
-- 
2.20.1



[PATCH 02/42] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2

2019-02-15 Thread H.J. Lu
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
source operand is allowed.

2019-02-08  H.J. Lu  
Uros Bizjak  

PR target/89021
* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
New prototype.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
function.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.md (mmx_isa): New.
(enabled): Also check mmx_isa.
* config/i386/mmx.md (any_s_truncate): New code iterator.
(s_trunsuffix): New code attr.
(mmx_packsswb): Removed.
(mmx_packssdw): Likewise.
(mmx_packuswb): Likewise.
(mmx_packswb): New define_insn_and_split to emulate
MMX packsswb/packuswb with SSE2.
(mmx_packssdw): Likewise.
* config/i386/predicates.md (register_mmxmem_operand): New.
---
 gcc/config/i386/i386-protos.h |  3 ++
 gcc/config/i386/i386.c| 54 
 gcc/config/i386/i386.md   | 13 +++
 gcc/config/i386/mmx.md| 67 +++
 gcc/config/i386/predicates.md |  7 
 5 files changed, 114 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 27f5cc13abf..a53b48438ec 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, 
rtx, rtx);
 
 extern rtx ix86_split_stack_guard (void);
 
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif /* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7d7dd80930e..d31b69d9a82 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20221,6 +20221,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, 
rtx operands[])
 gcc_unreachable ();
 }
 
+/* Move bits 64:95 to bits 32:63.  */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+  rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op));
+  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  rtx insn = gen_rtx_SET (dest, op);
+  emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  machine_mode dmode = GET_MODE (op0);
+  machine_mode smode = GET_MODE (op1);
+  machine_mode inner_dmode = GET_MODE_INNER (dmode);
+  machine_mode inner_smode = GET_MODE_INNER (smode);
+
+  /* Get the corresponding SSE mode for destination.  */
+  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+   nunits).require ();
+  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+nunits / 2).require ();
+
+  /* Get the corresponding SSE mode for source.  */
+  nunits = 16 / GET_MODE_SIZE (inner_smode);
+  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+   nunits).require ();
+
+  /* Generate SSE pack with signed/unsigned saturation.  */
+  rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0));
+  op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
+  op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
+
+  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+   op1, op2));
+  emit_insn (insn);
+
+  ix86_move_vector_high_sse_to_mmx (op0);
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 40ed93dc804..e1727676deb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,10 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
+  (const_string "base"))
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
 (eq_attr "isa" "x64_sse2")
@@ -830,6 +834,15 @@
 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
 (eq_attr "isa" "avx512vl") 

[PATCH 04/42] i386: Emulate MMX plusminus/sat_plusminus with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX plusminus/sat_plusminus with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
(plusminus:mmx_3): Check
TARGET_MMX_WITH_SSE.
(sat_plusminus:mmx_3): Likewise.
(3): New.
(*mmx_3): Add SSE emulation.
(*mmx_3): Likewise.
---
 gcc/config/i386/mmx.md | 59 +++---
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 34fecd6a745..517c3283963 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
 
 ;; All 8-byte vector modes handled by MMX
 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -663,39 +663,56 @@
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI8 0 "register_operand")
(plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand")))]
-  "TARGET_MMX || (TARGET_SSE2 && mode == V1DImode)"
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (plusminus:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand")
+ (match_operand:MMXMODEI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
 (plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_MMX || (TARGET_SSE2 && mode == V1DImode))
+ (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv")
+ (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODE12 0 "register_operand")
(sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand")
- (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
 (sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODE12 1 "register_mmxmem_operand" "0,0,Yv")
+ (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 01/42] i386: Allow MMX register modes in SSE registers

2019-02-15 Thread H.J. Lu
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support MMX register modes.

PR target/89021
* config/i386/i386-c.c (ix86_target_macros_internal): Define
__MMX_WITH_SSE__ for TARGET_MMX_WITH_SSE.
* config/i386/i386.c (ix86_set_reg_reg_cost): Add support for
TARGET_MMX_WITH_SSE with VALID_MMX_REG_MODE.
(ix86_vector_mode_supported_p): Likewise.
* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
---
 gcc/config/i386/i386-c.c | 2 ++
 gcc/config/i386/i386.c   | 5 +++--
 gcc/config/i386/i386.h   | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 5e7e46fcebe..213e1b56c6b 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -548,6 +548,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
 def_or_undef (parse_in, "__CLDEMOTE__");
   if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
 def_or_undef (parse_in, "__PTWRITE__");
+  if (TARGET_MMX_WITH_SSE)
+def_or_undef (parse_in, "__MMX_WITH_SSE__");
   if (TARGET_IAMCU)
 {
   def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3e5f52175d2..7d7dd80930e 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40490,7 +40490,8 @@ ix86_set_reg_reg_cost (machine_mode mode)
  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
- || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
+ || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE (mode)))
units = GET_MODE_SIZE (mode);
 }
 
@@ -44316,7 +44317,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
 return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
 return true;
-  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+  if ((TARGET_MMX ||TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
 return true;
   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
 return true;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 4fd8bc40a34..91b233022c2 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #define TARGET_16BIT   TARGET_CODE16
 #define TARGET_16BIT_P(x)  TARGET_CODE16_P(x)
 
+#define TARGET_MMX_WITH_SSE(TARGET_64BIT && TARGET_SSE2)
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
-- 
2.20.1



[PATCH 00/42] V7: Emulate MMX intrinsics with SSE

2019-02-15 Thread H.J. Lu
On x86-64, since __m64 is returned and passed in XMM registers, we can
emulate MMX intrinsics with SSE instructions. To support it, we added

 #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)

;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
  (const_string "base"))

 (eq_attr "mmx_isa" "native")
   (symbol_ref "!TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64")
   (symbol_ref "TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64_avx")
   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
 (eq_attr "mmx_isa" "x64_noavx")
   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")

We added SSE emulation to MMX patterns and disabled MMX alternatives with
TARGET_MMX_WITH_SSE.

Most of MMX instructions have equivalent SSE versions and results of some
SSE versions need to be reshuffled to the right order for MMX.  Thee are
couple tricky cases:

1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
mask operand and handle unmapped bits 64:127 at memory address by
adjusting source and mask operands together with memory address.

2. MMX movntq is emulated with SSE2 DImode movnti, which is available
in 64-bit mode.

3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
SSE emulation must clear the bit 4 in the shuffle control mask.

4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
the upper 64 bits of destination XMM register.

Tests are also added to check each SSE emulation of MMX intrinsics.

There are no regressions on i686 and x86-64.  For x86-64, GCC is also
tested with

--with-arch=native --with-cpu=native

on AVX2 and AVX512F machines.

H.J. Lu (41):
  i386: Allow MMX register modes in SSE registers
  i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
  i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
  i386: Emulate MMX plusminus/sat_plusminus with SSE
  i386: Emulate MMX mulv4hi3 with SSE
  i386: Emulate MMX smulv4hi3_highpart with SSE
  i386: Emulate MMX mmx_pmaddwd with SSE
  i386: Emulate MMX ashr3/3 with SSE
  i386: Emulate MMX 3 with SSE
  i386: Emulate MMX mmx_andnot3 with SSE
  i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
  i386: Emulate MMX vec_dupv2si with SSE
  i386: Emulate MMX pshufw with SSE
  i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  i386: Emulate MMX sse_cvtpi2ps with SSE
  i386: Emulate MMX mmx_pextrw with SSE
  i386: Emulate MMX mmx_pinsrw with SSE
  i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
  i386: Emulate MMX mmx_pmovmskb with SSE
  i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
  i386: Emulate MMX maskmovq with SSE2 maskmovdqu
  i386: Emulate MMX mmx_uavgv8qi3 with SSE
  i386: Emulate MMX mmx_uavgv4hi3 with SSE
  i386: Emulate MMX mmx_psadbw with SSE
  i386: Emulate MMX movntq with SSE2 movntidi
  i386: Emulate MMX umulv1siv1di3 with SSE2
  i386: Make _mm_empty () as NOP when MMX is disabled
  i386: Emulate MMX ssse3_phwv4hi3 with SSE
  i386: Emulate MMX ssse3_phdv2si3 with SSE
  i386: Emulate MMX ssse3_pmaddubsw with SSE
  i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
  i386: Emulate MMX pshufb with SSE version
  i386: Emulate MMX ssse3_psign3 with SSE
  i386: Emulate MMX ssse3_palignrdi with SSE
  i386: Emulate MMX abs2 with SSE
  i386: Correct _pmulhrsw3[_mask]
  i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
  i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
  i386: Allow MMX intrinsic emulation with SSE
  i386: Enable TM MMX intrinsics with SSE2
  i386: Add tests for MMX intrinsic emulations with SSE

Uros Bizjak (1):
  Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE

 gcc/config/i386/constraints.md|   6 +
 gcc/config/i386/i386-builtin.def  | 126 +--
 gcc/config/i386/i386-c.c  |   2 +
 gcc/config/i386/i386-protos.h |   4 +
 gcc/config/i386/i386.c| 189 +++-
 gcc/config/i386/i386.h|   2 +
 gcc/config/i386/i386.md   |  17 +
 gcc/config/i386/mmintrin.h|  12 +-
 gcc/config/i386/mmx.md| 986 --
 gcc/config/i386/predicates.md |  14 +
 gcc/config/i386/sse.md| 368 +--
 gcc/config/i386/xmmintrin.h   |  61 ++
 gcc/testsuite/gcc.target/i386/mmx-vals.h  |  77 ++
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c   |  43 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c   |  39 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c   |  42 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c   |  40 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c   |  31 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c   |  36 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-16.c   

[PATCH 03/42] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX

2019-02-15 Thread H.J. Lu
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/i386.c (ix86_split_mmx_punpck): New function.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
---
 gcc/config/i386/i386-protos.h |   1 +
 gcc/config/i386/i386.c|  77 +++
 gcc/config/i386/mmx.md| 138 ++
 3 files changed, 168 insertions(+), 48 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index a53b48438ec..37581837a32 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -204,6 +204,7 @@ extern rtx ix86_split_stack_guard (void);
 
 extern void ix86_move_vector_high_sse_to_mmx (rtx);
 extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d31b69d9a82..a76c17beece 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20275,6 +20275,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code 
code)
   ix86_move_vector_high_sse_to_mmx (op0);
 }
 
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  machine_mode mode = GET_MODE (op0);
+  rtx mask;
+  /* The corresponding SSE mode.  */
+  machine_mode sse_mode, double_sse_mode;
+
+  switch (mode)
+{
+case E_V8QImode:
+  sse_mode = V16QImode;
+  double_sse_mode = V32QImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+  break;
+
+case E_V4HImode:
+  sse_mode = V8HImode;
+  double_sse_mode = V16HImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+  break;
+
+case E_V2SImode:
+  sse_mode = V4SImode;
+  double_sse_mode = V8SImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+  break;
+
+default:
+  gcc_unreachable ();
+}
+
+  /* Generate SSE punpcklXX.  */
+  rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
+  op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
+  op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
+
+  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+  rtx insn = gen_rtx_SET (dest, op2);
+  emit_insn (insn);
+
+  if (high_p)
+{
+  /* Move bits 64:127 to bits 0:63.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+  dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+  emit_insn (insn);
+}
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index dbb2baa74d7..34fecd6a745 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1064,87 +1064,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 

Re: Fortran vector math header

2019-02-15 Thread Steve Kargl
On Tue, Feb 05, 2019 at 01:47:57PM +0100, Martin Liška wrote:
> 
> gcc/fortran/ChangeLog:
> 
> 2019-01-24  Martin Liska  
> 
>   * decl.c (gfc_match_gcc_builtin): Add support for filtering
>   of builtin directive based on multilib ABI name.
> 
> gcc/testsuite/ChangeLog:
> 
> 2019-01-24  Martin Liska  
> 
>   * gfortran.dg/simd-builtins-7.f90: New test.
>   * gfortran.dg/simd-builtins-7.h: New test.

The Fortran bits look ok to me.

-- 
steve


Go patch committed: Use __builtin_dwarf_cfa for getcallersp

2019-02-15 Thread Ian Lance Taylor
This patch by Cherry Zhang changes the Go compiler and runtime to use
__builtin_dwarf_cfa for getcallersp.  Currently, the compiler lowers
runtime.getcallersp to __builtin_frame_address(1).  In the C side of
the runtime, getcallersp is defined as __builtin_frame_address(0).
They don't match.  Further, neither of them actually returns the
caller's SP.  On x86_64, __builtin_frame_address(0) just returns the
frame pointer.  __builtin_frame_address(1) returns the memory content
where the frame pointer points to, which is typically the caller's
frame pointer but can also be garbage if the frame pointer is not
enabled.

This patch changes getcallersp to use __builtin_dwarf_cfa(), which
returns the caller's SP at the call site.  This matches the SP we get
from unwinding the stack.

Currently getcallersp is not used for anything real. It will be used
for precise stack scan.

Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian


2019-02-15  Cherry Zhang  

* go-gcc.cc (Gcc_backend::Gcc_backend): Define __builtin_dwarf_cfa
instead of __builtin_frame_address.
Index: gcc/go/go-gcc.cc
===
--- gcc/go/go-gcc.cc(revision 268369)
+++ gcc/go/go-gcc.cc(working copy)
@@ -734,8 +734,9 @@ Gcc_backend::Gcc_backend()
   this->define_builtin(BUILT_IN_RETURN_ADDRESS, "__builtin_return_address",
   NULL, t, false, false);
 
-  // The runtime calls __builtin_frame_address for runtime.getcallersp.
-  this->define_builtin(BUILT_IN_FRAME_ADDRESS, "__builtin_frame_address",
+  // The runtime calls __builtin_dwarf_cfa for runtime.getcallersp.
+  t = build_function_type_list(ptr_type_node, NULL_TREE);
+  this->define_builtin(BUILT_IN_DWARF_CFA, "__builtin_dwarf_cfa",
   NULL, t, false, false);
 
   // The runtime calls __builtin_extract_return_addr when recording
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 268948)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-1a74b8a22b2ff7f430729aa87ecb8cea7b5cdd70
+9605c2efd99aa9c744652a9153e208e0653b8596
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/expressions.cc
===
--- gcc/go/gofrontend/expressions.cc(revision 268923)
+++ gcc/go/gofrontend/expressions.cc(working copy)
@@ -9903,17 +9903,18 @@ Call_expression::do_lower(Gogo* gogo, Na
  && n == "getcallerpc")
{
  static Named_object* builtin_return_address;
+  int arg = 0;
  return this->lower_to_builtin(_return_address,
"__builtin_return_address",
-   0);
+   );
}
  else if ((this->args_ == NULL || this->args_->size() == 0)
   && n == "getcallersp")
{
- static Named_object* builtin_frame_address;
- return this->lower_to_builtin(_frame_address,
-   "__builtin_frame_address",
-   1);
+ static Named_object* builtin_dwarf_cfa;
+ return this->lower_to_builtin(_dwarf_cfa,
+   "__builtin_dwarf_cfa",
+   NULL);
}
}
 }
@@ -10031,21 +10032,24 @@ Call_expression::lower_varargs(Gogo* gog
   this->varargs_are_lowered_ = true;
 }
 
-// Return a call to __builtin_return_address or __builtin_frame_address.
+// Return a call to __builtin_return_address or __builtin_dwarf_cfa.
 
 Expression*
 Call_expression::lower_to_builtin(Named_object** pno, const char* name,
- int arg)
+ int* arg)
 {
   if (*pno == NULL)
-*pno = Gogo::declare_builtin_rf_address(name);
+*pno = Gogo::declare_builtin_rf_address(name, arg != NULL);
 
   Location loc = this->location();
 
   Expression* fn = Expression::make_func_reference(*pno, NULL, loc);
-  Expression* a = Expression::make_integer_ul(arg, NULL, loc);
   Expression_list *args = new Expression_list();
-  args->push_back(a);
+  if (arg != NULL)
+{
+  Expression* a = Expression::make_integer_ul(*arg, NULL, loc);
+  args->push_back(a);
+}
   Expression* call = Expression::make_call(fn, args, false, loc);
 
   // The builtin functions return void*, but the Go functions return uintptr.
Index: gcc/go/gofrontend/expressions.h
===
--- gcc/go/gofrontend/expressions.h (revision 268369)
+++ gcc/go/gofrontend/expressions.h (working copy)
@@ -2356,7 +2356,7 @@ class Call_expression : public Expressio
   

[testsuite] Couple of g++.dg/asan tweaks

2019-02-15 Thread Eric Botcazou
One of the tests in g++.dg/asan/asan_oob_test.cc uses unaligned memory 
accesses and g++.dg/asan/function-argument-3.C assumes a specific kind of 
calling conventions for vectors.

Tested on SPARC64/Linux, applied on the mainline.


2019-02-15  Eric Botcazou  

* g++.dg/asan/asan_oob_test.cc: Skip OOB_int on SPARC.
* g++.dg/asan/function-argument-3.C: Tweak for 32-bit SPARC.

-- 
Eric BotcazouIndex: g++.dg/asan/asan_oob_test.cc
===
--- g++.dg/asan/asan_oob_test.cc	(revision 268849)
+++ g++.dg/asan/asan_oob_test.cc	(working copy)
@@ -68,9 +68,13 @@ TEST(AddressSanitizer, OOB_char) {
   OOBTest();
 }
 
+// The following test uses unaligned memory accesses
+
+#if !defined(__sparc__)
 TEST(AddressSanitizer, OOB_int) {
   OOBTest();
 }
+#endif
 
 TEST(AddressSanitizer, OOBRightTest) {
   for (size_t access_size = 1; access_size <= 8; access_size *= 2) {
Index: g++.dg/asan/function-argument-3.C
===
--- g++.dg/asan/function-argument-3.C	(revision 268849)
+++ g++.dg/asan/function-argument-3.C	(working copy)
@@ -2,7 +2,16 @@
 // { dg-shouldfail "asan" }
 // { dg-additional-options "-Wno-psabi" }
 
+// On SPARC 32-bit, only vectors up to 8 bytes are passed in registers
+#if defined(__sparc__) && !defined(__sparcv9) && !defined(__arch64__)
+#define SMALL_VECTOR
+#endif
+
+#ifdef SMALL_VECTOR
+typedef int v4si __attribute__ ((vector_size (8)));
+#else
 typedef int v4si __attribute__ ((vector_size (16)));
+#endif
 
 static __attribute__ ((noinline)) int
 goo (v4si *a)
@@ -19,10 +28,14 @@ foo (v4si arg)
 int
 main ()
 {
+#ifdef SMALL_VECTOR
+  v4si v = {1,2};
+#else
   v4si v = {1,2,3,4};
+#endif
   return foo (v);
 }
 
 // { dg-output "ERROR: AddressSanitizer: stack-buffer-overflow on address.*(\n|\r\n|\r)" }
 // { dg-output "READ of size . at.*" }
-// { dg-output ".*'arg' \\(line 14\\) <== Memory access at offset \[0-9\]* overflows this variable.*" }
+// { dg-output ".*'arg' \\(line 23\\) <== Memory access at offset \[0-9\]* overflows this variable.*" }


[SPARC] Small ASAN fixes

2019-02-15 Thread Eric Botcazou
This automatically passes -funwind-tables when ASAN is used on Linux, as done 
for other architectures, and also adjusts the shadow offset in 64-bit mode.

Tested on SPARC64/Linux, applied on the mainline.


2019-02-15  Eric Botcazou  

* config/sparc/linux.h (ASAN_CC1_SPEC): Define.
(CC1_SPEC): Use GNU_USER_TARGET_CC1_SPEC and ASAN_CC1_SPEC.
* config/sparc/linux64.h (ASAN_CC1_SPEC): Likewise.
(CC1_SPEC): Likewise.
* config/sparc/sparc.c (sparc_asan_shadow_offset): Adjust for 64-bit.

-- 
Eric BotcazouIndex: config/sparc/linux.h
===
--- config/sparc/linux.h	(revision 268849)
+++ config/sparc/linux.h	(working copy)
@@ -54,10 +54,11 @@ extern const char *host_detect_local_cpu
 
 #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
 
-/* This is for -profile to use -lc_p instead of -lc.  */
-#undef	CC1_SPEC
-#define	CC1_SPEC "%{profile:-p} \
-"
+#undef  ASAN_CC1_SPEC
+#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"
+
+#undef  CC1_SPEC
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC
 
 #undef SIZE_TYPE
 #define SIZE_TYPE "unsigned int"
Index: config/sparc/linux64.h
===
--- config/sparc/linux64.h	(revision 268849)
+++ config/sparc/linux64.h	(working copy)
@@ -143,24 +143,25 @@ extern const char *host_detect_local_cpu
 
 #define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
 
-#undef	CC1_SPEC
+#undef  ASAN_CC1_SPEC
+#define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"
+
+#undef  CC1_SPEC
 #if DEFAULT_ARCH32_P
-#define CC1_SPEC "%{profile:-p} \
-%{m32:%{m64:%emay not use both -m32 and -m64}} \
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC \
+"%{m32:%{m64:%emay not use both -m32 and -m64}} \
 %{m64:-mptr64 -mstack-bias -mlong-double-128 \
   %{!mcpu*:-mcpu=ultrasparc} \
-  %{!mno-vis:%{!mcpu=v9:-mvis}}} \
-"
+  %{!mno-vis:%{!mcpu=v9:-mvis}}}"
 #else
-#define CC1_SPEC "%{profile:-p} \
-%{m32:%{m64:%emay not use both -m32 and -m64}} \
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC \
+"%{m32:%{m64:%emay not use both -m32 and -m64}} \
 %{m32:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \
   %{!mcpu*:-mcpu=cypress}} \
 %{mv8plus:-mptr32 -mno-stack-bias %{!mlong-double-128:-mlong-double-64} \
   %{!mcpu*:-mcpu=v9}} \
 %{!m32:%{!mcpu*:-mcpu=ultrasparc}} \
-%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}} \
-"
+%{!mno-vis:%{!m32:%{!mcpu=v9:-mvis}}}"
 #endif
 
 /* Support for a compile-time default CPU, et cetera.  The rules are:
Index: config/sparc/sparc.c
===
--- config/sparc/sparc.c	(revision 268849)
+++ config/sparc/sparc.c	(working copy)
@@ -12524,7 +12524,7 @@ sparc_init_machine_status (void)
 static unsigned HOST_WIDE_INT
 sparc_asan_shadow_offset (void)
 {
-  return TARGET_ARCH64 ? HOST_WIDE_INT_C (0x7fff8000) : (HOST_WIDE_INT_1 << 29);
+  return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
 }
 
 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.


Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)

2019-02-15 Thread Martin Sebor

On 2/15/19 3:46 PM, Eric Botcazou wrote:

I'm ready to commit the patch once it's approved, and have been since
the day the problem was reported.


Maybe CCing whoever approved the previous patch would help?


I just pinged the patch a few minutes ago and CC'd Jason.  Sorry
about any trouble this has caused.

Martin


Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)

2019-02-15 Thread Eric Botcazou
> I'm ready to commit the patch once it's approved, and have been since
> the day the problem was reported.

Maybe CCing whoever approved the previous patch would help?

-- 
Eric Botcazou


Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)

2019-02-15 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00857.html

Jason, since you approved the original patch, can you please also
review this one?  Due to the Ada test breakage there seems to be
some anxiety about getting the problem corrected soon.

Thanks
Martin

On 2/11/19 6:13 PM, Martin Sebor wrote:

The attached patch removes the assumption introduced earlier today
in my fix for bug 87996 that the valid_constant_size_p argument is
a constant expression.  I couldn't come up with a C/C++ test case
where this isn't true but apparently it can happen in Ada which I
inadvertently didn't build.  I still haven't figured out what
I have to do to build it on my Fedora 29 machine so I tested
this change by hand (besides bootstrapping w/o Ada).

The first set of instructions Google gives me don't seem to do
it:

   https://fedoraproject.org/wiki/Features/Ada_developer_tools

and neither does dnf install gcc-gnat as explained on our Wiki:

   https://gcc.gnu.org/wiki/GNAT

If someone knows the magic chant I would be grateful (it might
be helpful to also update the Wiki page -- the last change to
it was made in 2012; I volunteer to do that).

Martin




[PATCH, og8] Don't rescan "attach" node for dereferenced struct member

2019-02-15 Thread Julian Brown
Hi,

The following (og8 branch) patch added support for
attaching/detaching from dereferenced struct members:

https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01778.html

Unfortunately I made a mistake in the portion of that patch that
inserts new alloc and firstprivate_pointer nodes for the struct base,
meaning that the node rewritten to an attach operation would be
scanned again. This is both unnecessary, and can cause problems in some
circumstances.

Tested with offloading to nvptx, no regressions and the new test passes.
I will apply (to the og8 branch) shortly.

Thanks,

Julian

ChangeLog

gcc/
* gimplify.c (gimplify_scan_omp_clauses): Avoid scanning
'c' again after creating base-pointer nodes for
dereferenced struct.

gcc/testsuite/
* gfortran.dg/goacc/derived-types-2.f90: New.
commit e374d415801588435d62ac214e0313ffd3ef2198
Author: Julian Brown 
Date:   Thu Feb 14 16:40:21 2019 -0800

[og8] Don't rescan "attach" node for dereferenced struct member

gcc/
* gimplify.c (gimplify_scan_omp_clauses): Avoid scanning 'c' again
after creating base-pointer nodes for dereferenced struct.

gcc/testsuite/
* gfortran.dg/goacc/derived-types-2.f90: New.

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 8bf11eb659e..2ff5b68e0cc 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -8289,8 +8289,6 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		  *list_p = c2;
 		  OMP_CLAUSE_CHAIN (c2) = c3;
 		  OMP_CLAUSE_CHAIN (c3) = c;
-		  c = c3;
-		  list_p = _CLAUSE_CHAIN (c3);
 
 		  struct_deref_set->add (decl);
 		}
diff --git a/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90 b/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90
new file mode 100644
index 000..d01583fac89
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/derived-types-2.f90
@@ -0,0 +1,14 @@
+module bar
+  type :: type1
+ real(8), pointer, public :: p(:) => null()
+  end type
+  type :: type2
+ class(type1), pointer :: p => null()
+  end type
+end module
+
+subroutine foo (var)
+   use bar
+   type(type2), intent(inout) :: var
+   !$acc enter data create(var%p%p)
+end subroutine


Re: [patch] Disable store merging in asan_expand_mark_ifn

2019-02-15 Thread Eric Botcazou
> > OK, revised patch attached.  I have manually verified that it yields the
> > expected result for an array of long doubles on 64-bit SPARC.
> > 
> > 
> > 2019-02-12  Eric Botcazou  
> > 
> > * asan.c (asan_expand_mark_ifn): Take into account the alignment of
> > the object to pick the size of stores on strict-alignment platforms.
> 
> Ok, thanks.

Glad you insisted in the end, because I have ASAN working on SPARC64/Linux, 
but only after fixing another bug on 64-bit strict-alignment platforms:

  /* Align base if target is STRICT_ALIGNMENT.  */
  if (STRICT_ALIGNMENT)
base = expand_binop (Pmode, and_optab, base,
 gen_int_mode (-((GET_MODE_ALIGNMENT (SImode)
  << ASAN_SHADOW_SHIFT)
 / BITS_PER_UNIT), Pmode), NULL_RTX,
 1, OPTAB_DIRECT);

GET_MODE_ALIGNMENT is unsigned int so this zero-extends to unsigned long...

Tested on 32-bit and 64-bit SPARC/Linux, applied on mainline as obvious.


2019-02-15  Eric Botcazou  

* asan.c (asan_emit_stack_protection): Use full-sized mask to align
the base address on 64-bit strict-alignment platforms.

-- 
Eric BotcazouIndex: asan.c
===
--- asan.c	(revision 268849)
+++ asan.c	(working copy)
@@ -1440,13 +1441,15 @@ asan_emit_stack_protection (rtx base, rt
 	base_align_bias = ((asan_frame_size + alignb - 1)
 			   & ~(alignb - HOST_WIDE_INT_1)) - asan_frame_size;
 }
+
   /* Align base if target is STRICT_ALIGNMENT.  */
   if (STRICT_ALIGNMENT)
-base = expand_binop (Pmode, and_optab, base,
-			 gen_int_mode (-((GET_MODE_ALIGNMENT (SImode)
-	  << ASAN_SHADOW_SHIFT)
-	 / BITS_PER_UNIT), Pmode), NULL_RTX,
-			 1, OPTAB_DIRECT);
+{
+  const HOST_WIDE_INT align
+	= (GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT) << ASAN_SHADOW_SHIFT;
+  base = expand_binop (Pmode, and_optab, base, gen_int_mode (-align, Pmode),
+			   NULL_RTX, 1, OPTAB_DIRECT);
+}
 
   if (use_after_return_class == -1 && pbase)
 emit_move_insn (pbase, base);
@ -1534,7 +1548,7 @@ asan_emit_stack_protection (rtx base, rt
   shadow_mem = gen_rtx_MEM (SImode, shadow_base);
   set_mem_alias_set (shadow_mem, asan_shadow_set);
   if (STRICT_ALIGNMENT)
-set_mem_align (shadow_mem, (GET_MODE_ALIGNMENT (SImode)));
+set_mem_align (shadow_mem, GET_MODE_ALIGNMENT (SImode));
   prev_offset = base_offset;
 
   asan_redzone_buffer rz_buffer (shadow_mem, prev_offset);


Re: [PATCH] Avoid assuming valid_constant_size_p argument is a constant expression (PR 89294)

2019-02-15 Thread Martin Sebor

On 2/15/19 12:24 AM, Eric Botcazou wrote:

The attached patch removes the assumption introduced earlier today
in my fix for bug 87996 that the valid_constant_size_p argument is
a constant expression.  I couldn't come up with a C/C++ test case
where this isn't true but apparently it can happen in Ada which I
inadvertently didn't build.


Can we do something here?  Our internal testers have been down for 3 days
because of this blunder...


I'm ready to commit the patch once it's approved, and have been since
the day the problem was reported.


Martin




Go patch committed: Don't use a nil check for the write barrier

2019-02-15 Thread Ian Lance Taylor
This patch to the Go frontend by Than McIntosh tweaks the recipe for
generating writeBarrier loads to insure that the dereference expr is
marked as not requiring a nil check.  This should fix gcc PR 89368.
Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 268941)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-0563f2d018cdb2cd685c254bac5ceb38396d0a27
+1a74b8a22b2ff7f430729aa87ecb8cea7b5cdd70
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: gcc/go/gofrontend/wb.cc
===
--- gcc/go/gofrontend/wb.cc (revision 268923)
+++ gcc/go/gofrontend/wb.cc (working copy)
@@ -904,7 +904,8 @@ Gogo::check_write_barrier(Block* enclosi
   ref = Expression::make_unary(OPERATOR_AND, ref, loc);
   ref = Expression::make_cast(unsafe_pointer_type, ref, loc);
   ref = Expression::make_cast(puint32_type, ref, loc);
-  ref = Expression::make_unary(OPERATOR_MULT, ref, loc);
+  ref = Expression::make_dereference(ref,
+ Expression::NIL_CHECK_NOT_NEEDED, loc);
   Expression* zero = Expression::make_integer_ul(0, ref->type(), loc);
   Expression* cond = Expression::make_binary(OPERATOR_EQEQ, ref, zero, loc);
 


[PR fortran/89077, patch, part 3] - ICE using * as len specifier for character parameter

2019-02-15 Thread Harald Anlauf
The attached patch is the third in a series for the above PR.
This one fixes erroneous padding with garbage characters in some
declaration and initialization expressions.

The issue here was that expr->representation is set when either
Hollerith strings are used or a TRANSFER statement is involved.
As a result, the original string could be used with trailing
garbage instead of the properly space-padded string.  The patch
simply clears expr->representation in that case.

Regtested on x86_64-pc-linux-gnu.

OK for trunk?

Thanks,
Harald

2019-02-15  Harald Anlauf  

PR fortran/89077
* decl.c (gfc_set_constant_character_len): Clear original string
representation after padding has been performed to target length.

2019-02-15  Harald Anlauf  

PR fortran/89077
* gfortran.dg/transfer_simplify_12.f90: New test.

Index: gcc/fortran/decl.c
===
--- gcc/fortran/decl.c  (revision 268946)
+++ gcc/fortran/decl.c  (working copy)
@@ -1754,6 +1754,14 @@
   free (expr->value.character.string);
   expr->value.character.string = s;
   expr->value.character.length = len;
+  /* If explicit representation was given, clear it
+as it is no longer needed after padding.  */
+  if (expr->representation.length)
+   {
+ expr->representation.length = 0;
+ free (expr->representation.string);
+ expr->representation.string = NULL;
+   }
 }
 }
 
Index: gcc/testsuite/gfortran.dg/transfer_simplify_12.f90
===
--- gcc/testsuite/gfortran.dg/transfer_simplify_12.f90  (nonexistent)
+++ gcc/testsuite/gfortran.dg/transfer_simplify_12.f90  (working copy)
@@ -0,0 +1,27 @@
+! { dg-do run }
+! { dg-options "-O -std=legacy" }
+!
+! Test fixes for some findings while resolving PR fortran/89077
+
+program test
+  implicit none
+  integer :: i
+  character(*)  ,parameter :: s =  'abcdef'   ! Length will be 6
+  character(*)  ,parameter :: h = 6Habcdef! Length will be 8 (Hollerith!)
+  character(10) ,parameter :: k = 6Habcdef
+  character(10) ,parameter :: t = transfer (s, s)
+  character(10) ,save  :: u = transfer (s, s)
+  character(10) ,parameter :: v = transfer (h, h)
+  character(10) ,save  :: w = transfer (h, h)
+  character(10) ,parameter :: x = transfer ([(s(i:i),i=len(s),1,-1)], s)
+  character(10) ,save  :: y = transfer ([(s(i:i),i=len(s),1,-1)], s)
+  if (len (h) /= 8) stop 1
+  if (h /= s) stop 2
+  if (k /= s) stop 3
+  if (t /= s) stop 4
+  if (u /= s) stop 5
+  if (v /= s) stop 6
+  if (w /= s) stop 7
+  if (x /= "fedcba") stop 8
+  if (y /= x) stop 9
+end program test


[PATCH] i386: Fix ')' in VALID_MMX_REG_MODE

2019-02-15 Thread H.J. Lu
Replace "(MODE == V1DImode)" with "(MODE) == V1DImode".

* config/i386/i386.h (VALID_MMX_REG_MODE): Correct the misplaced
')'.
---
 gcc/ChangeLog  | 5 +
 gcc/config/i386/i386.h | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d1083735e26..96f8679e8f9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2019-02-15  H.J. Lu  
+
+   * config/i386/i386.h (VALID_MMX_REG_MODE): Correct the misplaced
+   ')'.
+
 2019-02-15  Uroš Bizjak  
 
* config/i386/darwin.h (TARGET_FPMATH_DEFAULT_P): New define.
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index d9039060997..4fd8bc40a34 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1158,7 +1158,7 @@ extern const char *host_detect_local_cpu (int argc, const 
char **argv);
   ((MODE) == V2SFmode || (MODE) == SFmode)
 
 #define VALID_MMX_REG_MODE(MODE)   \
-  ((MODE == V1DImode) || (MODE) == DImode  \
+  ((MODE) == V1DImode || (MODE) == DImode  \
|| (MODE) == V2SImode || (MODE) == SImode   \
|| (MODE) == V4HImode || (MODE) == V8QImode)
 
-- 
2.20.1



Re: libgo patch committed: Add S/390 support to internal/cpu package

2019-02-15 Thread Matthias Klose
On 15.02.19 15:52, Ian Lance Taylor wrote:
> This patch by Robin Dapp adds S/390 support to the internal/cpu
> package.  This partially addresses PR 89123.  I bootstrapped it on
> x86_64-pc-linux-gnu, which means little.  Committed to mainline.

fails in the -m31 multilib variant with

libtool: compile:  /<>/build/./gcc/xgcc
-B/<>/build/./gcc/ -B/usr/s390x-linux-gnu/bin/
-B/usr/s390x-linux-gnu/lib/ -isystem /usr/s390x-linux-gnu/include -isystem
/usr/s390x-linux-gnu/sys-include -isys
tem /<>/build/sys-include -m31 -DHAVE_CONFIG_H -I.
-I../../../../src/libgo -I ../../../../src/libgo/
runtime -I../../../../src/libgo/../libffi/include -I../libffi/include -pthread
-L../libatomic/.libs -fexceptions
-fnon-call-exceptions -fno-stack-protector -fsplit-stack -Wall -Wextra
-Wwrite-strings -Wcast-qual -D_GNU_SOURCE
-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -I ../../../../src/libgo/../libgcc -I
../../../../src/libgo/../libback
trace -I ../../../gcc/include -g -O2 -m31 -c
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c  -fPIC -DPIC -o in
ternal/cpu/.libs/cpu_gccgo.o

../../../../src/libgo/go/internal/cpu/cpu_gccgo.c: Assembler messages:
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:91: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:105: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:119: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:134: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:149: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:164: Error: Unrecognized
opcode: `lghi'
../../../../src/libgo/go/internal/cpu/cpu_gccgo.c:179: Error: Unrecognized
opcode: `lghi'
make[10]: *** [Makefile:2899: internal/cpu/cpu_gccgo.lo] Error 1

make[10]: *** Waiting for unfinished jobs
make[10]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo'
make[9]: *** [Makefile:2242: all-recursive] Error 1
make[9]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo'
make[8]: *** [Makefile:1167: all] Error 2
make[8]: Leaving directory '/<>/build/s390x-linux-gnu/32/libgo'
make[7]: *** [Makefile:3062: multi-do] Error 1

using binutils 2.32


Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE

2019-02-15 Thread Uros Bizjak
On Fri, Feb 15, 2019 at 7:20 PM H.J. Lu  wrote:
> > I went through the code again, and looks OK in general, modulo
> > mmx_nonimmediate_operand issue and a couple of minor issues.
> >
> > Please substitute nonimmediate_operand predicate with
> > mmx_nonimmediate_operand in expanders and insn patterns. Please note
>
> Can we keep nonimmediate_operand in expanders, like

No, expander should also be changed. The way expanders are called is -
if the operand can't satisfy the predicate, then move it to a
register. So, for TARGET_MMX_WITH_SSE, we allow memory operand which
isn't allowed by relevant insn pattern -> ICE.

There is nothing RA can do here. Operand type, produced by expander
must match predicate in the insn pattern to satisfy insn pattern.
Otherwise, the compiler will ICE way before RA comes into play. Also,
in the insn pattern, the constraints must allow a subset of an operand
predicate if we want RA to fixup the operand.

Uros.

> (define_expand "3"
>   [(set (match_operand:MMXMODEI 0 "register_operand")
> (plusminus:MMXMODEI
>   (match_operand:MMXMODEI 1 "nonimmediate_operand")
>   (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
>   "TARGET_MMX_WITH_SSE"
>   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
>
> (define_insn "*mmx_3"
>   [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
> (plusminus:MMXMODEI8
>   (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv")
>   (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
>   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
>&& ix86_binary_operator_ok (, mode, operands)"
>   "@
>p\t{%2, %0|%0, %2}
>p\t{%2, %0|%0, %2}
>vp\t{%2, %1, %0|%0, %1, %2}"
>   [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
>(set_attr "type" "mmxadd,sseadd,sseadd")
>(set_attr "mode" "DI,TI,TI")])
>
> Can RA do the right thing?
>
> > that the proposed convention is to name the operand
> > register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest
> > we name the predicate in this way.
>
> I will rename it to register_mmxmem_operand.
>
> > There is an issue with a change to emms pattern.
> >
> > And let's remove _mm_empty () calls from testcases; they complicate
> > things too much for no apparent benefit.
>
> Will do.
>
> > With those issues fixed, the patchset is OK for gcc-10 when it opens.
> >
> > Uros.
> >
> > > H.J. Lu (41):
> > >   i386: Allow MMX register modes in SSE registers
> > >   i386: Add mmx_nonimmediate_operand
> > >   i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
> > >   i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
> > >   i386: Emulate MMX plusminus/sat_plusminus with SSE
> > >   i386: Emulate MMX mulv4hi3 with SSE
> > >   i386: Emulate MMX smulv4hi3_highpart with SSE
> > >   i386: Emulate MMX mmx_pmaddwd with SSE
> > >   i386: Emulate MMX ashr3/3 with SSE
> > >   i386: Emulate MMX 3 with SSE
> > >   i386: Emulate MMX mmx_andnot3 with SSE
> > >   i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
> > >   i386: Emulate MMX vec_dupv2si with SSE
> > >   i386: Emulate MMX pshufw with SSE
> > >   i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
> > >   i386: Emulate MMX sse_cvtpi2ps with SSE
> > >   i386: Emulate MMX mmx_pextrw with SSE
> > >   i386: Emulate MMX mmx_pinsrw with SSE
> > >   i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
> > >   i386: Emulate MMX mmx_pmovmskb with SSE
> > >   i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
> > >   i386: Emulate MMX maskmovq with SSE2 maskmovdqu
> > >   i386: Emulate MMX mmx_uavgv8qi3 with SSE
> > >   i386: Emulate MMX mmx_uavgv4hi3 with SSE
> > >   i386: Emulate MMX mmx_psadbw with SSE
> > >   i386: Emulate MMX movntq with SSE2 movntidi
> > >   i386: Emulate MMX umulv1siv1di3 with SSE2
> > >   i386: Make _mm_empty () as NOP when MMX is disabled
> > >   i386: Emulate MMX ssse3_phwv4hi3 with SSE
> > >   i386: Emulate MMX ssse3_phdv2si3 with SSE
> > >   i386: Emulate MMX ssse3_pmaddubsw with SSE
> > >   i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
> > >   i386: Emulate MMX pshufb with SSE version
> > >   i386: Emulate MMX ssse3_psign3 with SSE
> > >   i386: Emulate MMX ssse3_palignrdi with SSE
> > >   i386: Emulate MMX abs2 with SSE
> > >   i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
> > >   i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
> > >   i386: Allow MMX intrinsic emulation with SSE
> > >   i386: Enable TM MMX intrinsics with SSE2
> > >   i386: Add tests for MMX intrinsic emulations with SSE
> > >
> > > Uros Bizjak (1):
> > >   Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE
> > >
> > >  gcc/config/i386/constraints.md|   6 +
> > >  gcc/config/i386/i386-builtin.def  | 126 +--
> > >  gcc/config/i386/i386-c.c  |   2 +
> > >  gcc/config/i386/i386-protos.h |   4 +
> > >  gcc/config/i386/i386.c| 189 +++-
> > >  gcc/config/i386/i386.h   

Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE

2019-02-15 Thread H.J. Lu
On Fri, Feb 15, 2019 at 9:50 AM Uros Bizjak  wrote:
>
> On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu  wrote:
> >
> > On x86-64, since __m64 is returned and passed in XMM registers, we can
> > emulate MMX intrinsics with SSE instructions. To support it, we added
> >
> >  #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)
> >
> > ;; Define instruction set of MMX instructions
> > (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
> >   (const_string "base"))
> >
> >  (eq_attr "mmx_isa" "native")
> >(symbol_ref "!TARGET_MMX_WITH_SSE")
> >  (eq_attr "mmx_isa" "x64")
> >(symbol_ref "TARGET_MMX_WITH_SSE")
> >  (eq_attr "mmx_isa" "x64_avx")
> >(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
> >  (eq_attr "mmx_isa" "x64_noavx")
> >(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
> >
> > We added SSE emulation to MMX patterns and disabled MMX alternatives with
> > TARGET_MMX_WITH_SSE.
> >
> > Most of MMX instructions have equivalent SSE versions and results of some
> > SSE versions need to be reshuffled to the right order for MMX.  Thee are
> > couple tricky cases:
> >
> > 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
> > maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
> > mask operand and handle unmapped bits 64:127 at memory address by
> > adjusting source and mask operands together with memory address.
> >
> > 2. MMX movntq is emulated with SSE2 DImode movnti, which is available
> > in 64-bit mode.
> >
> > 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
> > SSE emulation must clear the bit 4 in the shuffle control mask.
> >
> > 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
> > the upper 64 bits of destination XMM register.
> >
> > Tests are also added to check each SSE emulation of MMX intrinsics.
> >
> > There are no regressions on i686 and x86-64.  For x86-64, GCC is also
> > tested with
> >
> > --with-arch=native --with-cpu=native
> >
> > on AVX2 and AVX512F machines.
>
> I went through the code again, and looks OK in general, modulo
> mmx_nonimmediate_operand issue and a couple of minor issues.
>
> Please substitute nonimmediate_operand predicate with
> mmx_nonimmediate_operand in expanders and insn patterns. Please note

Can we keep nonimmediate_operand in expanders, like

(define_expand "3"
  [(set (match_operand:MMXMODEI 0 "register_operand")
(plusminus:MMXMODEI
  (match_operand:MMXMODEI 1 "nonimmediate_operand")
  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
  "TARGET_MMX_WITH_SSE"
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")

(define_insn "*mmx_3"
  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
(plusminus:MMXMODEI8
  (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv")
  (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))]
  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
   && ix86_binary_operator_ok (, mode, operands)"
  "@
   p\t{%2, %0|%0, %2}
   p\t{%2, %0|%0, %2}
   vp\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
   (set_attr "type" "mmxadd,sseadd,sseadd")
   (set_attr "mode" "DI,TI,TI")])

Can RA do the right thing?

> that the proposed convention is to name the operand
> register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest
> we name the predicate in this way.

I will rename it to register_mmxmem_operand.

> There is an issue with a change to emms pattern.
>
> And let's remove _mm_empty () calls from testcases; they complicate
> things too much for no apparent benefit.

Will do.

> With those issues fixed, the patchset is OK for gcc-10 when it opens.
>
> Uros.
>
> > H.J. Lu (41):
> >   i386: Allow MMX register modes in SSE registers
> >   i386: Add mmx_nonimmediate_operand
> >   i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
> >   i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
> >   i386: Emulate MMX plusminus/sat_plusminus with SSE
> >   i386: Emulate MMX mulv4hi3 with SSE
> >   i386: Emulate MMX smulv4hi3_highpart with SSE
> >   i386: Emulate MMX mmx_pmaddwd with SSE
> >   i386: Emulate MMX ashr3/3 with SSE
> >   i386: Emulate MMX 3 with SSE
> >   i386: Emulate MMX mmx_andnot3 with SSE
> >   i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
> >   i386: Emulate MMX vec_dupv2si with SSE
> >   i386: Emulate MMX pshufw with SSE
> >   i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
> >   i386: Emulate MMX sse_cvtpi2ps with SSE
> >   i386: Emulate MMX mmx_pextrw with SSE
> >   i386: Emulate MMX mmx_pinsrw with SSE
> >   i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
> >   i386: Emulate MMX mmx_pmovmskb with SSE
> >   i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
> >   i386: Emulate MMX maskmovq with SSE2 maskmovdqu
> >   i386: Emulate MMX mmx_uavgv8qi3 with SSE
> >   i386: Emulate MMX mmx_uavgv4hi3 with SSE
> >   i386: Emulate MMX mmx_psadbw 

[PATCH, i386]: Add missing TARGET_FPMATH_DEFAULT_P to darwin.h

2019-02-15 Thread Uros Bizjak
Darwin defines its own TARGET_FPMATH_DEFAULT, which should be
accompanied by corresponding TARGET_FPMATH_DEFAULT_P.  Patch adds
missing define.

While looking around, I also fixed various whitespace issues in the header.

BTW: The header file still defines TARGET_64BIT which is horribly out
of date. Someone should introduce correct multilib support to Darwin
to bring it in line with Linux and Solaris, so these defines could be
removed in favour of generic ones in i386.h.

2019-02-15  Uroš Bizjak  

* config/i386/darwin.h (TARGET_FPMATH_DEFAULT_P): New define.

Tested by building a crosscompiler to x86_64-apple-darwin18.

Committed to mainline SVN as obvious.

Uros.
diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
index a63841ca5554..d8e72ec69a57 100644
--- a/gcc/config/i386/darwin.h
+++ b/gcc/config/i386/darwin.h
@@ -25,10 +25,10 @@ along with GCC; see the file COPYING3.  If not see
 #undef DARWIN_X86
 #define DARWIN_X86 1
 
-#undef  TARGET_64BIT
-#undef TARGET_64BIT_P
+#undef TARGET_64BIT
 #define TARGET_64BIT TARGET_ISA_64BIT
-#defineTARGET_64BIT_P(x) TARGET_ISA_64BIT_P(x)
+#undef TARGET_64BIT_P
+#define TARGET_64BIT_P(x) TARGET_ISA_64BIT_P(x)
 
 #ifdef IN_LIBGCC2
 #undef TARGET_64BIT
@@ -70,14 +70,15 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef TARGET_FPMATH_DEFAULT
 #define TARGET_FPMATH_DEFAULT (TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+#undef TARGET_FPMATH_DEFAULT_P
+#define TARGET_FPMATH_DEFAULT_P(x) \
+  (TARGET_SSE_P(x) ? FPMATH_SSE : FPMATH_387)
 
 #define TARGET_OS_CPP_BUILTINS()\
-  do\
-{   \
-  builtin_define ("__LITTLE_ENDIAN__"); \
-  darwin_cpp_builtins (pfile); \
-}   \
-  while (0)
+  do { \
+builtin_define ("__LITTLE_ENDIAN__");  \
+darwin_cpp_builtins (pfile);   \
+  } while (0)
 
 #undef PTRDIFF_TYPE
 #define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
@@ -121,7 +122,7 @@ extern int darwin_emit_branch_islands;
than 128 bits for Darwin, but it's easier to up the alignment if
it's below the minimum.  */
 #undef PREFERRED_STACK_BOUNDARY
-#define PREFERRED_STACK_BOUNDARY   \
+#define PREFERRED_STACK_BOUNDARY \
   MAX (128, ix86_preferred_stack_boundary)
 
 /* We want -fPIC by default, unless we're using -static to compile for
@@ -179,15 +180,15 @@ extern int darwin_emit_branch_islands;
and returns float values in the 387.  */
 
 #undef TARGET_SUBTARGET_DEFAULT
-#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | 
MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE)
+#define TARGET_SUBTARGET_DEFAULT \
+  (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE)
 
 /* For darwin we want to target specific processor features as a minimum,
but these unfortunately don't correspond to a specific processor.  */
 #undef TARGET_SUBTARGET32_ISA_DEFAULT
-#define TARGET_SUBTARGET32_ISA_DEFAULT (OPTION_MASK_ISA_MMX\
-   | OPTION_MASK_ISA_SSE   \
-   | OPTION_MASK_ISA_SSE2  \
-   | OPTION_MASK_ISA_SSE3)
+#define TARGET_SUBTARGET32_ISA_DEFAULT \
+  (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE   \
+   | OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3)
 
 #undef TARGET_SUBTARGET64_ISA_DEFAULT
 #define TARGET_SUBTARGET64_ISA_DEFAULT TARGET_SUBTARGET32_ISA_DEFAULT
@@ -209,15 +210,16 @@ extern int darwin_emit_branch_islands;
 #define SUBTARGET_ENCODE_SECTION_INFO  darwin_encode_section_info
 
 #undef ASM_OUTPUT_ALIGN
-#define ASM_OUTPUT_ALIGN(FILE,LOG) \
- do { if ((LOG) != 0)  \
-{  \
-  if (in_section == text_section) \
-fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \
-  else \
-fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG)); \
-}  \
-} while (0)
+#define ASM_OUTPUT_ALIGN(FILE,LOG)\
+  do {\
+if ((LOG) != 0)   \
+  {   \
+   if (in_section == text_section)\
+ fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG));   \
+   else   \
+ fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG));\
+  }   \
+  } while (0)
 
 /* Darwin x86 assemblers support the .ident directive.  */
 
@@ -227,16 +229,16 @@ extern int darwin_emit_branch_islands;
 /* Darwin profiling -- call mcount.  */
 

Re: [PATCH 00/40] V6: Emulate MMX intrinsics with SSE

2019-02-15 Thread Uros Bizjak
On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu  wrote:
>
> On x86-64, since __m64 is returned and passed in XMM registers, we can
> emulate MMX intrinsics with SSE instructions. To support it, we added
>
>  #define TARGET_MMX_WITH_SSE (TARGET_64BIT && TARGET_SSE2)
>
> ;; Define instruction set of MMX instructions
> (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx"
>   (const_string "base"))
>
>  (eq_attr "mmx_isa" "native")
>(symbol_ref "!TARGET_MMX_WITH_SSE")
>  (eq_attr "mmx_isa" "x64")
>(symbol_ref "TARGET_MMX_WITH_SSE")
>  (eq_attr "mmx_isa" "x64_avx")
>(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
>  (eq_attr "mmx_isa" "x64_noavx")
>(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
>
> We added SSE emulation to MMX patterns and disabled MMX alternatives with
> TARGET_MMX_WITH_SSE.
>
> Most of MMX instructions have equivalent SSE versions and results of some
> SSE versions need to be reshuffled to the right order for MMX.  Thee are
> couple tricky cases:
>
> 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
> maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
> mask operand and handle unmapped bits 64:127 at memory address by
> adjusting source and mask operands together with memory address.
>
> 2. MMX movntq is emulated with SSE2 DImode movnti, which is available
> in 64-bit mode.
>
> 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
> SSE emulation must clear the bit 4 in the shuffle control mask.
>
> 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
> the upper 64 bits of destination XMM register.
>
> Tests are also added to check each SSE emulation of MMX intrinsics.
>
> There are no regressions on i686 and x86-64.  For x86-64, GCC is also
> tested with
>
> --with-arch=native --with-cpu=native
>
> on AVX2 and AVX512F machines.

I went through the code again, and looks OK in general, modulo
mmx_nonimmediate_operand issue and a couple of minor issues.

Please substitute nonimmediate_operand predicate with
mmx_nonimmediate_operand in expanders and insn patterns. Please note
that the proposed convention is to name the operand
register_mmxmem_operand (c.f. register_ssemem_operand), so I suggest
we name the predicate in this way.

There is an issue with a change to emms pattern.

And let's remove _mm_empty () calls from testcases; they complicate
things too much for no apparent benefit.

With those issues fixed, the patchset is OK for gcc-10 when it opens.

Uros.

> H.J. Lu (41):
>   i386: Allow MMX register modes in SSE registers
>   i386: Add mmx_nonimmediate_operand
>   i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
>   i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
>   i386: Emulate MMX plusminus/sat_plusminus with SSE
>   i386: Emulate MMX mulv4hi3 with SSE
>   i386: Emulate MMX smulv4hi3_highpart with SSE
>   i386: Emulate MMX mmx_pmaddwd with SSE
>   i386: Emulate MMX ashr3/3 with SSE
>   i386: Emulate MMX 3 with SSE
>   i386: Emulate MMX mmx_andnot3 with SSE
>   i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
>   i386: Emulate MMX vec_dupv2si with SSE
>   i386: Emulate MMX pshufw with SSE
>   i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
>   i386: Emulate MMX sse_cvtpi2ps with SSE
>   i386: Emulate MMX mmx_pextrw with SSE
>   i386: Emulate MMX mmx_pinsrw with SSE
>   i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
>   i386: Emulate MMX mmx_pmovmskb with SSE
>   i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
>   i386: Emulate MMX maskmovq with SSE2 maskmovdqu
>   i386: Emulate MMX mmx_uavgv8qi3 with SSE
>   i386: Emulate MMX mmx_uavgv4hi3 with SSE
>   i386: Emulate MMX mmx_psadbw with SSE
>   i386: Emulate MMX movntq with SSE2 movntidi
>   i386: Emulate MMX umulv1siv1di3 with SSE2
>   i386: Make _mm_empty () as NOP when MMX is disabled
>   i386: Emulate MMX ssse3_phwv4hi3 with SSE
>   i386: Emulate MMX ssse3_phdv2si3 with SSE
>   i386: Emulate MMX ssse3_pmaddubsw with SSE
>   i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
>   i386: Emulate MMX pshufb with SSE version
>   i386: Emulate MMX ssse3_psign3 with SSE
>   i386: Emulate MMX ssse3_palignrdi with SSE
>   i386: Emulate MMX abs2 with SSE
>   i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
>   i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
>   i386: Allow MMX intrinsic emulation with SSE
>   i386: Enable TM MMX intrinsics with SSE2
>   i386: Add tests for MMX intrinsic emulations with SSE
>
> Uros Bizjak (1):
>   Prevent allocation of MMX registers with TARGET_MMX_WITH_SSE
>
>  gcc/config/i386/constraints.md|   6 +
>  gcc/config/i386/i386-builtin.def  | 126 +--
>  gcc/config/i386/i386-c.c  |   2 +
>  gcc/config/i386/i386-protos.h |   4 +
>  gcc/config/i386/i386.c| 189 +++-
>  gcc/config/i386/i386.h|   2 +
>  

Re: [PATCH 28/42] i386: Make _mm_empty () as NOP when MMX is disabled

2019-02-15 Thread Uros Bizjak
On Fri, Feb 15, 2019 at 3:03 PM H.J. Lu  wrote:
>
> With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
> when MMX is disabled.
>
> PR target/89021
> * config/i386/mmx.md (EMMS): Also allow TARGET_MMX_WITH_SSE.
> (mmx_): Generate "" only when MMX is enabled.

Better rename the pattern to "*mmx_" and introduce a new expander:

(define_insn "mmx_"
  [(unspec_volatile [(const_int 0)] EMMS)
   (clobber (reg:XF ST0_REG))
   (clobber (reg:XF ST1_REG))
   (clobber (reg:XF ST2_REG))
   (clobber (reg:XF ST3_REG))
   (clobber (reg:XF ST4_REG))
   (clobber (reg:XF ST5_REG))
   (clobber (reg:XF ST6_REG))
   (clobber (reg:XF ST7_REG))
   (clobber (reg:DI MM0_REG))
   (clobber (reg:DI MM1_REG))
   (clobber (reg:DI MM2_REG))
   (clobber (reg:DI MM3_REG))
   (clobber (reg:DI MM4_REG))
   (clobber (reg:DI MM5_REG))
   (clobber (reg:DI MM6_REG))
   (clobber (reg:DI MM7_REG))]
  "TARGET_MMX || TARGET_MMX_WITH_SSE"
{
  if (!TARGET_MMX)
{
  emit_insn (gen_nop ());
  DONE;
}
})

This way, the compiler won't bother with {f,}emms when there are no
MMX registers.

Uros.

> ---
>  gcc/config/i386/mmx.md | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index d662663a445..eaca71d5750 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1839,7 +1839,7 @@
> (set_attr "mode" "DI")])
>
>  (define_int_iterator EMMS
> -  [(UNSPECV_EMMS "TARGET_MMX")
> +  [(UNSPECV_EMMS "TARGET_MMX || TARGET_MMX_WITH_SSE")
> (UNSPECV_FEMMS "TARGET_3DNOW")])
>
>  (define_int_attr emms
> @@ -1865,7 +1865,9 @@
> (clobber (reg:DI MM6_REG))
> (clobber (reg:DI MM7_REG))]
>""
> -  ""
> +{
> +  return TARGET_MMX ? "" : "";
>
> +}
>[(set_attr "type" "mmx")
> (set_attr "modrm" "0")
> (set_attr "memory" "none")])
> --
> 2.20.1
>


[Committed][PATCH][GCC][Arm] Remove alternative from neon_softfp_fp16 directive.

2019-02-15 Thread Tamar Christina
Hi All,

There's a bit of a disconnect between the feature flags that don't test the fpu
and ones that do when the test itself also forces an architecture.  The forcing
of the architecture would change the defaults and without explicitly giving the
correct fpu again the test would fail.

I don't see a good way to solve this problem, really the feature tests should
ideally contain the extra options the test adds too, but for this specific case
it can be solved by always testing the fpu explicitly.

Committed under the GCC obvious

Thanks,
Tamar

gcc/testsuite/ChangeLog:

2019-02-15  Tamar Christina  

* lib/target-supports.exp
(check_effective_target_arm_neon_softfp_fp16_ok_nocache): Drop non-fpu
checking alternative.

-- 
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 1d237d4cd664924cc580cff67a563230b3fe9571..5d8ba4436ac1ad29da57802f2465d05712c8e8e7 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3797,7 +3797,6 @@ proc check_effective_target_arm_neon_softfp_fp16_ok_nocache { } {
 if { [check_effective_target_arm32]
 	 && [check_effective_target_arm_neon_ok] } {
 	foreach flags {"-mfpu=neon-fp16 -mfloat-abi=softfp"
-		   "-mfloat-abi=softfp -mfp16-format=ieee"
 		   "-mfpu=neon-fp16 -mfloat-abi=softfp -mfp16-format=ieee"} {
 	if { [check_no_compiler_messages_nocache arm_neon_softfp_fp16_ok object {
 		#include "arm_neon.h"



Re: [PATCH][DOC] Document new features for GCC 9.

2019-02-15 Thread Eric Gallager
On 2/14/19, David Malcolm  wrote:
> On Thu, 2019-02-14 at 14:19 -0700, Martin Sebor wrote:
>> On 2/13/19 6:48 AM, Martin Liška wrote:
>> > Hi.
>> >
>> > I'm sending patch where I document changes I made during GCC 9
>> > development. I would appreciate both language and factical comments
>> > about the patch.
>>
>> Nothing technical, just a few very minor language nits/suggestions.
>>
>> Martin
>>
>> diff --git a/htdocs/gcc-9/changes.html b/htdocs/gcc-9/changes.html
>> index 13243c2..9fec9e2 100644
>> --- a/htdocs/gcc-9/changes.html
>> +++ b/htdocs/gcc-9/changes.html
>> @@ -50,11 +50,64 @@ a work-in-progress.
>>   General Improvements
>>   
>> 
>> -A new option -flive-patching=[inline-only-static|inline-clone]
>> is
>> +A new option
>> -flive-patching=[inline-only-static|inline-clone] is
>>
>> s/is/has been/ would be better (and either a comma after option or
>> a definite article without the comma).
>>
>>   introduced to provide a safe compilation for live-patching. At
>> the
>> same
>>   time, provides multiple-level control on the enabled IPA
>> optimizations.
>>   See the user guide for further information about the option for
>> more
>> -details.
>> +details.
>
> Ideally we should add URLs any time we mention an option, linking to
> the docs for that option.  texinfo's HTML toolchain does give us per-
> option anchors.  They're not visible [1], but "View Source" shows us
> that they do exist; in the form:
>
> https://gcc.gnu.org/onlinedocs/gcc/SOMETHING.html#indexOPTION
>
> though annoyingly the SOMETHING varies depending on what kind of option
> it is.
>
> The pertinent one here is:
> https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#index-flive-patching
>
> (FWIW, I have a patch for GCC 10 that emits terminal sequences to
> "linkify" the output when diagnostics mention option names, adding a
> URL to the docs for the pertinent option).
>
> [...snip...]
>
> Dave
>
> [1] I've emailed the texinfo project about this
>

The link for that thread is here, for reference:
https://lists.gnu.org/archive/html/help-texinfo/2019-02/msg0.html


Re: [PATCH][GCC][DOC] Remove obsolete arm and aarch64 CPU names from invoke.texi

2019-02-15 Thread Sam Tebbs
On 19/01/2019 23:37, Gerald Pfeifer wrote:

> On Thu, 10 Jan 2019, Sam Tebbs wrote:
>>> I believe this should also be covered in the GCC 9 release notes
>>> at https://gcc.gnu.org/gcc-9/changes.html ?
>> Sorry for the late reply. My email filters seem to have stumbled a bit
>> so I didn't pick this up until now. Would you suggest adding something
>> along the lines of "Removed obsolete Arm CPU names from the option
>> documentation" (perhaps with a full list as in my original email)?
> Yes, please.
>
> Gerald (now needing to look at his filters)

Hi Gerald,

I was looking into this and it seems that the CPU and architecture 
removals have already been documented in the Arm-specific section of the 
GCC 9 changes, so explicitly mentioning that the documentation has been 
removed as well is probably unnecessary.

Sam



Re: [PATCH 02/42] i386: Add mmx_nonimmediate_operand

2019-02-15 Thread Uros Bizjak
On Fri, Feb 15, 2019 at 2:58 PM H.J. Lu  wrote:
>
> True if the operand is a register or an nonimmediate operand when
> TARGET_MMX_WITH_SSE is false.
>
> PR target/89021
> * config/i386/predicates.md (mmx_nonimmediate_operand): New.
> ---
>  gcc/config/i386/predicates.md | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 99226e86436..bd1f07a28fb 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -49,6 +49,13 @@
>(and (match_code "reg")
> (match_test "MMX_REGNO_P (REGNO (op))")))
>
> +;; True if the operand is a register or an nonimmediate operand when
> +;; TARGET_MMX_WITH_SSE is false.
> +(define_predicate "mmx_nonimmediate_operand"
> +  (ior (match_operand 0 "register_operand")
> +   (and (not (match_test "TARGET_MMX_WITH_SSE"))
> +   (match_operand 0 "nonimmediate_operand"

Here you can use "memory_operand".

I'd expect you use this new predicate universally throughout the
patchset in e.g.

+  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   ...
+   ...
+   v...
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")

When TARGET_MMX_WITH_SSE is true, then only the last two constraints
are enabled, so we are sure that only register operand is allowed.
While RA can fixup mem->reg by itself, It is beneficial to pass this
information to the compiler via predicate, and
mmx_nonimmediate_operand fits there perfectly.

Uros.


GCC 8.3 Status Report (2019-02-15)

2019-02-15 Thread Jakub Jelinek
Status
==

The GCC 8 branch is now frozen for blocking regressions and documentation
fixes only, all changes to the branch require a RM approval now.


Quality Data


Priority  #   Change from last report
---   ---
P10
P2  193   -  11
P3   29   +   4
P4  163   -   2
P5   24
---   ---
Total P1-P3 222   -   7
Total   409   -   9


Previous Report
===

https://gcc.gnu.org/ml/gcc/2019-02/msg00034.html


Re: [PATCH 17/42] i386: Emulate MMX mmx_pextrw with SSE

2019-02-15 Thread H.J. Lu
On Fri, Feb 15, 2019 at 6:03 AM H.J. Lu  wrote:
>
> Emulate MMX mmx_pextrw with SSE.  Only SSE register source operand is
> allowed.
>
> PR target/89021
> * config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
> ---
>  gcc/config/i386/mmx.md | 16 +---
>  1 file changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 3ea64e9aabe..678eaa713dc 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -1310,16 +1310,18 @@
> (set_attr "mode" "DI")])
>
>  (define_insn "mmx_pextrw"
> -  [(set (match_operand:SI 0 "register_operand" "=r")
> +  [(set (match_operand:SI 0 "register_operand" "=r,r")
>  (zero_extend:SI
>   (vec_select:HI
> -   (match_operand:V4HI 1 "register_operand" "y")
> -   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]]
> -  "TARGET_SSE || TARGET_3DNOW_A"
> -  "pextrw\t{%2, %1, %0|%0, %1, %2}"
> -  [(set_attr "type" "mmxcvt")
> +   (match_operand:V4HI 1 "register_operand" "y,Yv")
> +   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]]
> +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> +   && (TARGET_SSE || TARGET_3DNOW_A)"
> +  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mmx_isa" "native,x64")
> +   (set_attr "type" "mmxcvt,sselog1")
> (set_attr "length_immediate" "1")
> -   (set_attr "mode" "DI")])
> +   (set_attr "mode" "DI,TI")])
>
>  (define_expand "mmx_pshufw"
>[(match_operand:V4HI 0 "register_operand")
> --
> 2.20.1
>

Here is the updated patch for mmx_pextrw.  It should be

(define_insn "mmx_pextrw"
  [(set (match_operand:SI 0 "register_operand" "=r,r")
(zero_extend:SI
  (vec_select:HI
(match_operand:V4HI 1 "register_operand" "y,Yv")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]]
  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
   && (TARGET_SSE || TARGET_3DNOW_A)"
  "@
   pextrw\t{%2, %1, %0|%0, %1, %2}
   %vpextrw\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "mmx_isa" "native,x64")
   (set_attr "type" "mmxcvt,sselog1")
   (set_attr "length_immediate" "1")
   (set_attr "mode" "DI,TI")])


-- 
H.J.
From 17bd9eb652aff70a72680f444fbb169344cf563b Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Fri, 25 Jan 2019 11:27:35 -0800
Subject: [PATCH 17/42] i386: Emulate MMX mmx_pextrw with SSE

Emulate MMX mmx_pextrw with SSE.  Only SSE register source operand is
allowed.

	PR target/89021
	* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 3ea64e9aabe..1818957f670 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1310,16 +1310,20 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
 (zero_extend:SI
 	  (vec_select:HI
-	(match_operand:V4HI 1 "register_operand" "y")
-	(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
+	(match_operand:V4HI 1 "register_operand" "y,Yv")
+	(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pextrw\t{%2, %1, %0|%0, %1, %2}
+   %vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-- 
2.20.1



Bugs in extended C interop

2019-02-15 Thread Bader, Reinhold
Dear Paul,

I've started putting together my observations on the current status of the 
F2018 C interop extensions  in gfortran 9.0. See the PRs
89363, 89364, 89365, 89366:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89363
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89364
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89365
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89366

Regards
Reinhold


smime.p7s
Description: S/MIME cryptographic signature


libgo patch committed: Add S/390 support to internal/cpu package

2019-02-15 Thread Ian Lance Taylor
This patch by Robin Dapp adds S/390 support to the internal/cpu
package.  This partially addresses PR 89123.  I bootstrapped it on
x86_64-pc-linux-gnu, which means little.  Committed to mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 268940)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-6877c95a5f44c3ab4f492d2000ce07771341d7b7
+0563f2d018cdb2cd685c254bac5ceb38396d0a27
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/go/internal/cpu/cpu_gccgo.c
===
--- libgo/go/internal/cpu/cpu_gccgo.c   (revision 268369)
+++ libgo/go/internal/cpu/cpu_gccgo.c   (working copy)
@@ -70,3 +70,118 @@ struct xgetbv_ret xgetbv(void) {
 #pragma GCC pop_options
 
 #endif /* defined(__i386__) || defined(__x86_64__)  */
+
+#ifdef __s390__
+
+struct facilityList {
+   uint64_t bits[4];
+};
+
+struct queryResult {
+   uint64_t bits[2];
+};
+
+struct facilityList stfle(void)
+  __asm__(GOSYM_PREFIX "internal..z2fcpu.stfle")
+  __attribute__((no_split_stack));
+
+struct facilityList stfle(void) {
+struct facilityList ret;
+__asm__ ("la%%r1, %[ret]\t\n"
+"lghi  %%r0, 3\t\n" // last doubleword index to store
+"xc0(32,%%r1), 0(%%r1)\t\n" // clear 4 doublewords (32 bytes)
+".long 0xb2b01000\t\n"  // store facility list extended (STFLE)
+:[ret] "=Q" (ret) : : "r0", "r1", "cc");
+return ret;
+}
+
+struct queryResult kmQuery(void)
+  __asm__(GOSYM_PREFIX "internal..z2fcpu.kmQuery")
+  __attribute__((no_split_stack));
+
+struct queryResult kmQuery() {
+struct queryResult ret;
+
+__asm__ ("lghi   %%r0, 0\t\n" // set function code to 0 (KM-Query)
+"la %%r1, %[ret]\t\n"
+".long  0xb92e0024\t\n" // cipher message (KM)
+:[ret] "=Q" (ret) : : "r0", "r1", "cc");
+return ret;
+}
+
+struct queryResult kmcQuery(void)
+  __asm__(GOSYM_PREFIX "internal..z2fcpu.kmcQuery")
+  __attribute__((no_split_stack));
+
+struct queryResult kmcQuery() {
+struct queryResult ret;
+
+__asm__ ("lghi   %%r0, 0\t\n" // set function code to 0 (KMC-Query)
+"la %%r1, %[ret]\t\n"
+".long  0xb92f0024\t\n"  // cipher message with chaining (KMC)
+:[ret] "=Q" (ret) : : "r0", "r1", "cc");
+
+return ret;
+}
+
+struct queryResult kmctrQuery(void)
+  __asm__(GOSYM_PREFIX "internal..z2fcpu.kmctrQuery")
+  __attribute__((no_split_stack));
+
+struct queryResult kmctrQuery() {
+struct queryResult ret;
+
+__asm__ ("lghi   %%r0, 0\t\n" // set function code to 0 (KMCTR-Query)
+"la %%r1, %[ret]\t\n"
+".long  0xb92d4024\t\n" // cipher message with counter (KMCTR)
+:[ret] "=Q" (ret) : : "r0", "r1", "cc");
+
+return ret;
+}
+
+struct queryResult kmaQuery(void)
+  __asm__(GOSYM_PREFIX "internal..z2fcpu.kmaQuery")
+  __attribute__((no_split_stack));
+
+struct queryResult kmaQuery() {
+struct queryResult ret;
+
+__asm__ ("lghi   %%r0, 0\t\n" // set function code to 0 (KMA-Query)
+"la %%r1, %[ret]\t\n"
+".long  0xb9296024\t\n" // cipher message with authentication (KMA)
+:[ret] "=Q" (ret) : : "r0", "r1", "cc");
+
+return ret;
+}
+
+struct queryResult kimdQuery(void)
+  __asm__(GOSYM_PREFIX "internal..z2fcpu.kimdQuery")
+  __attribute__((no_split_stack));
+
+struct queryResult kimdQuery() {
+struct queryResult ret;
+
+__asm__ ("lghi   %%r0, 0\t\n"  // set function code to 0 (KIMD-Query)
+"la %%r1, %[ret]\t\n"
+".long  0xb93e0024\t\n"  // compute intermediate message digest 
(KIMD)
+:[ret] "=Q" (ret) : : "r0", "r1", "cc");
+
+return ret;
+}
+
+struct queryResult klmdQuery(void)
+  __asm__(GOSYM_PREFIX "internal..z2fcpu.klmdQuery")
+  __attribute__((no_split_stack));
+
+struct queryResult klmdQuery() {
+struct queryResult ret;
+
+__asm__ ("lghi   %%r0, 0\t\n"  // set function code to 0 (KLMD-Query)
+"la %%r1, %[ret]\t\n"
+".long  0xb93f0024\t\n"  // compute last message digest (KLMD)
+:[ret] "=Q" (ret) : : "r0", "r1", "cc");
+
+return ret;
+}
+
+#endif /* defined(__s390__)  */
Index: libgo/go/internal/cpu/cpu_s390x.go
===
--- libgo/go/internal/cpu/cpu_s390x.go  (revision 268369)
+++ libgo/go/internal/cpu/cpu_s390x.go  (working copy)
@@ -98,13 +98,13 @@ func (s *facilityList) Has(fs ...facilit
 
 // The following feature detection functions are defined in cpu_s390x.s.
 // They are likely to be expensive to call so the results should be cached.
-func stfle() facilityList { panic("not implemented for gccgo") }
-func kmQuery() queryResult{ panic("not implemented for gccgo") }
-func kmcQuery() 

Re: Go patch committed: Harmonize types referenced by both C and Go

2019-02-15 Thread Ian Lance Taylor
On Fri, Feb 15, 2019 at 4:03 AM Rainer Orth  
wrote:
>
> Andreas Schwab  writes:
>
> > This breaks non-split-stack builds.
> >
> > ../../../libgo/runtime/stack.c: In function 'doscanstack1':
> > ../../../libgo/runtime/stack.c:113:18: error: passing argument 1 of
> > 'scanstackblock' makes integer from pointer without a cast
> > [-Werror=int-conversion]
> >   113 |   scanstackblock(bottom, (uintptr)(top - bottom), gcw);
> >   |  ^~
> >   |  |
> >   |  byte * {aka unsigned char *}
>
> I see the same on Solaris.  Even with that fixed by appropriate casts to
> uintptr (plus a few more times), Solaris bootstrap is still broken by
> that patch:
>
> /vol/gcc/src/hg/trunk/local/libgo/runtime/go-varargs.c: In function 
> '__go_syscall6':
> /vol/gcc/src/hg/trunk/local/libgo/runtime/go-varargs.c:101:10: error: 
> implicit declaration of function 'syscall' 
> [-Werror=implicit-function-declaration]
>   101 |   return syscall (flag, a1, a2, a3, a4, a5, a6);
>   |  ^~~
>
> This needs to include  for the syscall declaration, apart
> from the fundamental problem that syscall isn't a stable interface on
> Solaris.

I committed this patch which should fix the Solaris build.

The code was already calling syscall, it was just doing it in a way
that the types didn't necessarily match the C declaration.  This is
the implementation of Go's syscall.Syscall function, so there isn't
really anything else we can do.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 268939)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-a9c1a76e14b66a356d3c3dfb50f1e6138e97733c
+6877c95a5f44c3ab4f492d2000ce07771341d7b7
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/runtime/go-varargs.c
===
--- libgo/runtime/go-varargs.c  (revision 268923)
+++ libgo/runtime/go-varargs.c  (working copy)
@@ -12,6 +12,12 @@
 #include 
 #include 
 #include 
+#ifdef HAVE_SYSCALL_H
+#include 
+#endif
+#ifdef HAVE_SYS_SYSCALL_H
+#include 
+#endif
 
 /* The syscall package calls C functions.  The Go compiler can not
represent a C varargs functions.  On some systems it's important


[PR 89330] Avoid adding dead speculative edges to inlinig heap

2019-02-15 Thread Martin Jambor
Hi,

Martin discovered that inliner was adding deleted call graph edges to
its heap when supposedly processing newly discovered direct edges.  The
problem is that a new edge created in the speculation part of the
indirect inlining machinery created speculative edges that were
immediately afterwards removed by check_speculations() after it figured
out the edge is not speculation_useful_p().

The fix below avoids creating such non-speculation_useful_p edges in the
first place.  The edge is not useful because it cannot be inlined
because the callee calls comdat local functions.  I had to split
can_inline_edge_p into two functions to allow perform the caller and
callee checks before actually creating an edge.

I think this is safe and beneficial to commit now, maybe with the
exception of the newly added assert in add_new_edges_to_heap, since
inlining apparently can cope with such nonsensical edges in the heap.
But in that case I'd add the assert in the next stage1.

Bootstrapped and tested on x86_64-linux.  IIUC, Martin even
LTO-bootstrapped it.  OK for trunk?

Thanks,

Martin



2019-02-15  Martin Jambor  

PR ipa/89330
* ipa-inline.c (can_inline_edge_p): Move most of the checks...
(call_not_inlinable_p): ...this new function.
(add_new_edges_to_heap): Assert a caller is known.
* ipa-inline.h (call_not_inlinable_p): Declare.
* ipa-prop.c: Include ipa-inline.h
(try_make_edge_direct_virtual_call): Create speculative edges only
if there is any chance of inlining them.

testsuite/
* g++.dg/lto/pr89330_[01].C: New test.
---
 gcc/ipa-inline.c | 128 ---
 gcc/ipa-inline.h |   4 +-
 gcc/ipa-prop.c   |   8 +-
 gcc/testsuite/g++.dg/lto/pr89330_0.C |  50 +++
 gcc/testsuite/g++.dg/lto/pr89330_1.C |  36 
 5 files changed, 154 insertions(+), 72 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/lto/pr89330_0.C
 create mode 100644 gcc/testsuite/g++.dg/lto/pr89330_1.C

diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 360c3de3289..ae330943571 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -299,12 +299,60 @@ sanitize_attrs_match_for_inline_p (const_tree caller, 
const_tree callee)
   (opts_for_fn (caller->decl)->x_##flag\
!= opts_for_fn (callee->decl)->x_##flag)
 
+/* Return CIF_OK if a call from CALLER to CALLEE is or would be inlineable.
+   Otherwise, return the reason why it cannot.  EARLY should be set when
+   deciding about early inlining.  */
+
+enum cgraph_inline_failed_t
+call_not_inlinable_p (cgraph_node *caller, cgraph_node *callee,
+ bool early)
+{
+  enum availability avail;
+  caller = caller->global.inlined_to ? caller->global.inlined_to : caller;
+  callee = callee->ultimate_alias_target (, caller);
+
+  if (!callee->definition)
+return CIF_BODY_NOT_AVAILABLE;
+  if (!early && (!opt_for_fn (callee->decl, optimize)
+|| !opt_for_fn (caller->decl, optimize)))
+return CIF_FUNCTION_NOT_OPTIMIZED;
+  else if (callee->calls_comdat_local)
+return CIF_USES_COMDAT_LOCAL;
+  else if (avail <= AVAIL_INTERPOSABLE)
+return CIF_OVERWRITABLE;
+  /* Don't inline if the functions have different EH personalities.  */
+  else if (DECL_FUNCTION_PERSONALITY (caller->decl)
+  && DECL_FUNCTION_PERSONALITY (callee->decl)
+  && (DECL_FUNCTION_PERSONALITY (caller->decl)
+  != DECL_FUNCTION_PERSONALITY (callee->decl)))
+return CIF_EH_PERSONALITY;
+  /* TM pure functions should not be inlined into non-TM_pure
+ functions.  */
+  else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
+return CIF_UNSPECIFIED;
+  /* Check compatibility of target optimization options.  */
+  else if (!targetm.target_option.can_inline_p (caller->decl,
+   callee->decl))
+return CIF_TARGET_OPTION_MISMATCH;
+  else if (ipa_fn_summaries->get (callee) == NULL
+  || !ipa_fn_summaries->get (callee)->inlinable)
+return CIF_FUNCTION_NOT_INLINABLE;
+  /* Don't inline a function with mismatched sanitization attributes. */
+  else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
+return CIF_ATTRIBUTE_MISMATCH;
+  else if (callee->externally_visible
+  && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
+return CIF_EXTERN_LIVE_ONLY_STATIC;
+  return CIF_OK;
+}
+
 /* Decide if we can inline the edge and possibly update
inline_failed reason.  
We check whether inlining is possible at all and whether
caller growth limits allow doing so.  
 
-   if REPORT is true, output reason to the dump file. */
+   If REPORT is true, output reason to the dump file.  EARLY should be set when
+   deciding about early inlining.  */
 
 static bool
 can_inline_edge_p (struct cgraph_edge *e, bool report,
@@ -319,81 +367,22 @@ can_inline_edge_p (struct cgraph_edge 

[PATCH 29/42] i386: Emulate MMX ssse3_phwv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_phwv4hi3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phwv4hi3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f37658630dd..1c31a1fbad0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15232,13 +15232,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_concat:V4HI
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "0")
+   (match_operand:V4HI 1 "register_operand" "0,0,Yv")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
@@ -15247,19 +15247,37 @@
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))]
-  "TARGET_SSSE3"
-  "phw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   phw\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = lowpart_subreg (V8HImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx op1 = lowpart_subreg (V8HImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx op2 = lowpart_subreg (V8HImode, operands[2],
+   GET_MODE (operands[2]));
+  emit_insn (gen_ssse3_phwv8hi3 (op0, op1, op2));
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_phdv8si3"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
-- 
2.20.1



[PATCH 30/42] i386: Emulate MMX ssse3_phdv2si3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_phdv2si3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phdv2si3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 34 ++
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1c31a1fbad0..cb4a1c9fc59 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15356,26 +15356,44 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phdv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phdv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_concat:V2SI
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 1 "register_operand" "0,0,Yv")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])]
-  "TARGET_SSSE3"
-  "phd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   phd\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = lowpart_subreg (V4SImode, operands[0],
+   GET_MODE (operands[0]));
+  rtx op1 = lowpart_subreg (V4SImode, operands[1],
+   GET_MODE (operands[1]));
+  rtx op2 = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  emit_insn (gen_ssse3_phdv4si3 (op0, op1, op2));
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_pmaddubsw256"
   [(set (match_operand:V16HI 0 "register_operand" "=x,v")
-- 
2.20.1



[PATCH 10/42] i386: Emulate MMX 3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX 3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (any_logic:3): New.
(any_logic:*mmx_3): Also allow TARGET_MMX_WITH_SSE.
Add SSE support.
---
 gcc/config/i386/mmx.md | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index eef17504616..7a253005aba 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1066,15 +1066,28 @@
   "TARGET_MMX"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
 (any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yv")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 20/42] i386: Emulate MMX mmx_pmovmskb with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb
from QImode to SImode.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmovmskb): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/mmx.md | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 8833c9f091b..1adb50aa4b1 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1760,14 +1760,30 @@
   [(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
 
-(define_insn "mmx_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+(define_insn_and_split "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,x")]
   UNSPEC_MOVMSK))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   pmovmskb\t{%1, %0|%0, %1}
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 0)
+(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+   (set (match_dup 0)
+   (zero_extend:SI (match_dup 2)))]
+{
+  /* Generate SSE pmovmskb and zero-extend from QImode to SImode.  */
+  operands[1] = lowpart_subreg (V16QImode, operands[1],
+   GET_MODE (operands[1]));
+  operands[2] = lowpart_subreg (QImode, operands[0],
+   GET_MODE (operands[0]));
+}
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,ssemov")
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_maskmovq"
   [(set (match_operand:V8QI 0 "memory_operand")
-- 
2.20.1



[PATCH 38/42] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE

2019-02-15 Thread H.J. Lu
PR target/89021
* config/i386/mmx.md (MMXMODE:mov): Also allow
TARGET_MMX_WITH_SSE.
(MMXMODE:*mov_internal): Likewise.
(MMXMODE:movmisalign): Likewise.
---
 gcc/config/i386/mmx.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index eaca71d5750..c5c0c449aab 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -70,7 +70,7 @@
 (define_expand "mov"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
@@ -81,7 +81,7 @@
 "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
 "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
-  "TARGET_MMX
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -207,7 +207,7 @@
 (define_expand "movmisalign"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
-- 
2.20.1



[PATCH 33/42] i386: Emulate MMX pshufb with SSE version

2019-02-15 Thread H.J. Lu
Emulate MMX version of pshufb with SSE version by masking out the bit 3
of the shuffle control byte.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pshufbv8qi3): Changed to
define_insn_and_split.  Also allow TARGET_MMX_WITH_SSE.  Add
SSE emulation.
---
 gcc/config/i386/sse.md | 46 +-
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 2b91f8f5839..6fa9f383cd3 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15697,17 +15697,45 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "")])
 
-(define_insn "ssse3_pshufbv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-UNSPEC_PSHUFB))]
-  "TARGET_SSSE3"
-  "pshufb\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+(define_insn_and_split "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "mmx_nonimmediate_operand" 
"ym,x,Yv")]
+UNSPEC_PSHUFB))
+   (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pshufb\t{%2, %0|%0, %2}
+   #
+   #"
+  "TARGET_MMX_WITH_SSE && reload_completed"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 3)
+   (and:V4SI (match_dup 3) (match_dup 2)))
+   (set (match_dup 0)
+   (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
+{
+  /* Emulate MMX version of pshufb with SSE version by masking out the
+ bit 3 of the shuffle control byte.  */
+  operands[0] = lowpart_subreg (V16QImode, operands[0],
+   GET_MODE (operands[0]));
+  operands[1] = lowpart_subreg (V16QImode, operands[1],
+   GET_MODE (operands[1]));
+  operands[2] = lowpart_subreg (V4SImode, operands[2],
+   GET_MODE (operands[2]));
+  operands[4] = lowpart_subreg (V16QImode, operands[3],
+   GET_MODE (operands[3]));
+  rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7));
+  rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
+  operands[5] = force_const_mem (V4SImode, vec_const);
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_psign3"
   [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
-- 
2.20.1



[PATCH 28/42] i386: Make _mm_empty () as NOP when MMX is disabled

2019-02-15 Thread H.J. Lu
With SSE emulation of MMX intrinsics, we should make _mm_empty () as NOP
when MMX is disabled.

PR target/89021
* config/i386/mmx.md (EMMS): Also allow TARGET_MMX_WITH_SSE.
(mmx_): Generate "" only when MMX is enabled.
---
 gcc/config/i386/mmx.md | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index d662663a445..eaca71d5750 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1839,7 +1839,7 @@
(set_attr "mode" "DI")])
 
 (define_int_iterator EMMS
-  [(UNSPECV_EMMS "TARGET_MMX")
+  [(UNSPECV_EMMS "TARGET_MMX || TARGET_MMX_WITH_SSE")
(UNSPECV_FEMMS "TARGET_3DNOW")])
 
 (define_int_attr emms
@@ -1865,7 +1865,9 @@
(clobber (reg:DI MM6_REG))
(clobber (reg:DI MM7_REG))]
   ""
-  ""
+{
+  return TARGET_MMX ? "" : "";
+}
   [(set_attr "type" "mmx")
(set_attr "modrm" "0")
(set_attr "memory" "none")])
-- 
2.20.1



[PATCH 24/42] i386: Emulate MMX mmx_uavgv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_uavgv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_uavgv4hi3): Also check TARGET_MMX and
TARGET_MMX_WITH_SSE.
(*mmx_uavgv4hi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 0bd87ba79e8..456d1a51c50 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1739,27 +1739,33 @@
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
 
 (define_insn "*mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V4HImode, operands)"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "@
+   pavgw\t{%2, %0|%0, %2}
+   pavgw\t{%2, %0|%0, %2}
+   vpavgw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
   [(set (match_operand:V1DI 0 "register_operand" "=y")
-- 
2.20.1



[PATCH 40/42] i386: Allow MMX intrinsic emulation with SSE

2019-02-15 Thread H.J. Lu
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3.  Don't enable MMX ISA
by default with TARGET_MMX_WITH_SSE.

For pr82483-1.c and pr82483-2.c, "-mssse3 -mno-mmx" compiles in 64-bit
mode since MMX intrinsics can be emulated wit SSE.

gcc/

PR target/89021
* config/i386/i386-builtin.def: Enable MMX intrinsics with
SSE/SSE2/SSSE3.
* config/i386/i386.c (ix86_init_mmx_sse_builtins): Likewise.
(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
intrinsics with TARGET_MMX_WITH_SSE.
* config/i386/mmintrin.h: Only require SSE2 if __MMX_WITH_SSE__
is defined.

gcc/testsuite/

PR target/89021
* gcc.target/i386/pr82483-1.c: Error only on ia32.
* gcc.target/i386/pr82483-2.c: Likewise.
---
 gcc/config/i386/i386-builtin.def  | 126 +++---
 gcc/config/i386/i386.c|  29 -
 gcc/config/i386/mmintrin.h|  12 ++-
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 5 files changed, 101 insertions(+), 70 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4687f..10a9d631f29 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", 
IX86_BUILTIN_FNSTSW, UNKN
 BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, 
UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", 
IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, 
"__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* 3DNow! */
 BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", 
IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", 
IX86_BUILTIN_RORQI, UNKNO
 BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, 
UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", 
IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", 
IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", 
IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", 
IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", 
IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", 
IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, 
"__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, 
"__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, 
"__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, 
"__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, 
"__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, 
"__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, 
"__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, 
"__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", 
IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, 
"__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", 
IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, 
"__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", 
IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", 
IX86_BUILTIN_PXOR, UNKNOWN, (int) 

[PATCH 25/42] i386: Emulate MMX mmx_psadbw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_psadbw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_psadbw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 456d1a51c50..8ba8ca6ea45 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1768,14 +1768,19 @@
(set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
-(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
+(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv")]
 UNSPEC_PSADBW))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "@
+   psadbw\t{%2, %0|%0, %2}
+   psadbw\t{%2, %0|%0, %2}
+   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 27/42] i386: Emulate MMX umulv1siv1di3 with SSE2

2019-02-15 Thread H.J. Lu
Emulate MMX umulv1siv1di3 with SSE2.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (sse2_umulv1siv1di3): Add SSE emulation
support.
(*sse2_umulv1siv1di3): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 427a037fa62..d662663a445 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -911,24 +911,30 @@
(vec_select:V1SI
  (match_operand:V2SI 2 "nonimmediate_operand")
  (parallel [(const_int 0)])]
-  "TARGET_SSE2"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
 
 (define_insn "*sse2_umulv1siv1di3"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
+  [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
 (mult:V1DI
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SI 1 "nonimmediate_operand" "%0,0,Yv")
  (parallel [(const_int 0)])))
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,x,Yv")
  (parallel [(const_int 0)])]
-  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && TARGET_SSE2
+   && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+  "@
+   pmuludq\t{%2, %0|%0, %2}
+   pmuludq\t{%2, %0|%0, %2}
+   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 23/42] i386: Emulate MMX mmx_uavgv8qi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_uavgv8qi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_uavgv8qi3): Also check TARGET_MMX
and TARGET_MMX_WITH_SSE.
(*mmx_uavgv8qi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 940f022464d..0bd87ba79e8 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1684,42 +1684,47 @@
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "TARGET_SSE || TARGET_3DNOW"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
 
 (define_insn "*mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(truncate:V8QI
  (lshiftrt:V8HI
(plus:V8HI
  (plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0,0,Yv"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,x,Yv")))
  (const_vector:V8HI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
-  "(TARGET_SSE || TARGET_3DNOW)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V8QImode, operands)"
 {
   /* These two instructions have the same operation, but their encoding
  is different.  Prefer the one that is de facto standard.  */
-  if (TARGET_SSE || TARGET_3DNOW_A)
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
+  else if (TARGET_SSE || TARGET_3DNOW_A)
 return "pavgb\t{%2, %0|%0, %2}";
   else
 return "pavgusb\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "mmxshft")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
(set (attr "prefix_extra")
  (if_then_else
(not (ior (match_test "TARGET_SSE")
 (match_test "TARGET_3DNOW_A")))
(const_string "1")
(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 41/42] i386: Enable TM MMX intrinsics with SSE2

2019-02-15 Thread H.J. Lu
This pach enables TM MMX intrinsics with SSE2 when MMX is disabled.

PR target/89021
* config/i386/i386.c (bdesc_tm): Enable MMX intrinsics with
SSE2.
---
 gcc/config/i386/i386.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 073a2534d1f..319a98f824a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31065,13 +31065,13 @@ static const struct builtin_description 
bdesc_##kind[] =  \
we're lazy.  Add casts to make them fit.  */
 static const struct builtin_description bdesc_tm[] =
 {
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, 
VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, 
UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, 
UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, 
V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, 
UNKNOWN, V2SI_FTYPE_PCV2SI },
 
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum 
ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum 
ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
@@ -31089,7 +31089,7 @@ static const struct builtin_description bdesc_tm[] =
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum 
ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
 
-  { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+  { OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, 
"__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, 
VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
   { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum 
ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
 };
-- 
2.20.1



[PATCH 36/42] i386: Emulate MMX abs2 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX abs2 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/sse.md (abs2): Add SSE emulation.
---
 gcc/config/i386/sse.md | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e17f395688b..0174778833a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15942,16 +15942,19 @@
 })
 
 (define_insn "abs2"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
(abs:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
-  "TARGET_SSSE3"
-  "pabs\t{%1, %0|%0, %1}";
-  [(set_attr "type" "sselog1")
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+  "@
+   pabs\t{%1, %0|%0, %1}
+   %vpabs\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "sselog1")
(set_attr "prefix_rep" "0")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 32/42] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX ssse3_pmulhrswv4hi3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f2dbb51c7fd..2b91f8f5839 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15652,25 +15652,31 @@
(set_attr "mode" "")])
 
 (define_insn "*ssse3_pmulhrswv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))
(const_int 14))
  (match_operand:V4HI 3 "const1_operand"))
(const_int 1]
-  "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "pmulhrsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && TARGET_SSSE3
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   pmulhrsw\t{%2, %0|%0, %2}
+   pmulhrsw\t{%2, %0|%0, %2}
+   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_pshufb3"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
-- 
2.20.1



[PATCH 21/42] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_umulv4hi3_highpart with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_umulv4hi3_highpart): Also check
TARGET_MMX and TARGET_MMX_WITH_SSE.
(*mmx_umulv4hi3_highpart): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1adb50aa4b1..940f022464d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -785,24 +785,30 @@
  (zero_extend:V4SI
(match_operand:V4HI 2 "nonimmediate_operand")))
(const_int 16]
-  "TARGET_SSE || TARGET_3DNOW_A"
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (zero_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yv"))
  (zero_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "nonimmediate_operand" "ym,x,Yv")))
  (const_int 16]
-  "(TARGET_SSE || TARGET_3DNOW_A)
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "@
+   pmulhuw\t{%2, %0|%0, %2}
+   pmulhuw\t{%2, %0|%0, %2}
+   vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand")
-- 
2.20.1



[PATCH 17/42] i386: Emulate MMX mmx_pextrw with SSE

2019-02-15 Thread H.J. Lu
Emulate MMX mmx_pextrw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 3ea64e9aabe..678eaa713dc 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1310,16 +1310,18 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
 (zero_extend:SI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "y")
-   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
+   (match_operand:V4HI 1 "register_operand" "y,Yv")
+   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && (TARGET_SSE || TARGET_3DNOW_A)"
+  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-- 
2.20.1



  1   2   >