On Sat, Feb 09, 2019 at 10:56:38AM +0100, Jakub Jelinek wrote:
> On Sat, Feb 09, 2019 at 10:50:43AM +0100, Uros Bizjak wrote:
> > > Also need this patch since we no longer set MODE_XI for
> > > AVX512VL.
> > 
> > No. Please figure out correct condition to set mode attribute to XImode 
> > instead.
> 
> If it is AVX512VL, isn't MODE_OI or MODE_TI correct in those cases though?
> While the instructions need EVEX encoding if they have [xy]mm{16,...31}
> operands, they operate just on 256 or 128 bits.

That said, mov{oi,ti}_internal is severely broken for avx512f without
avx512vl even after this patch.

I think the following patch, incremental to H.J.'s patch, should fix that.
It is pretty much a copy of what sse.md (*mov<mode>_internal) pattern does,
just specialized to the particular instructions (i.e. that it is integral,
not floating, and always 32-byte or always 16-byte).  sse.md has:
      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
         in avx512f, so we need to use workarounds, to access sse registers
         16-31, which are evex-only. In avx512vl we don't need workarounds.  */
      if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
          && (EXT_REX_SSE_REG_P (operands[0])
              || EXT_REX_SSE_REG_P (operands[1])))
        {
          if (memory_operand (operands[0], <MODE>mode))
            {
              if (<MODE_SIZE> == 32)
                return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 
0x0}";
              else if (<MODE_SIZE> == 16)
                return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 
0x0}";
              else
                gcc_unreachable ();
            }
          else if (memory_operand (operands[1], <MODE>mode))
            {
              if (<MODE_SIZE> == 32)
                return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
              else if (<MODE_SIZE> == 16)
                return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
              else
                gcc_unreachable ();
            }
          else
            /* Reg -> reg move is always aligned.  Just use wider move.  */
            switch (get_attr_mode (insn))
              {
              case MODE_V8SF:
              case MODE_V4SF:
                return "vmovaps\t{%g1, %g0|%g0, %g1}";
              case MODE_V4DF:
              case MODE_V2DF:
                return "vmovapd\t{%g1, %g0|%g0, %g1}";
              case MODE_OI:
              case MODE_TI:
                return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
              default:
                gcc_unreachable ();
              }
        }
before it tries to handle the normal cases.  Ok for trunk if it passes
bootstrap/regtest?

2019-02-09  Jakub Jelinek  <ja...@redhat.com>

        PR target/89229
        * config/i386/i386.md (*movoi_internal_avx, *movti_internal): Handle
        MODE_XI properly.

--- gcc/config/i386/i386.md.jj  2019-02-09 11:18:53.995450055 +0100
+++ gcc/config/i386/i386.md     2019-02-09 11:26:04.364342306 +0100
@@ -1905,6 +1905,18 @@ (define_insn "*movoi_internal_avx"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
+      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+        in avx512f, so we need to use workarounds to access sse registers
+        16-31, which are evex-only. In avx512vl we don't need workarounds.  */
+      if (get_attr_mode (insn) == MODE_XI)
+       {
+         if (memory_operand (operands[0], OImode))
+           return "vextracti64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+         else if (memory_operand (operands[1], OImode))
+           return "vbroadcasti64x4\t{%1, %g0|%g0, %1}";
+         else
+           return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+       }
       if (misaligned_operand (operands[0], OImode)
          || misaligned_operand (operands[1], OImode))
        {
@@ -1968,6 +1980,18 @@ (define_insn "*movti_internal"
       return standard_sse_constant_opcode (insn, operands);
 
     case TYPE_SSEMOV:
+      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+        in avx512f, so we need to use workarounds to access sse registers
+        16-31, which are evex-only. In avx512vl we don't need workarounds.  */
+      if (get_attr_mode (insn) == MODE_XI)
+       {
+         if (memory_operand (operands[0], TImode))
+           return "vextracti32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+         else if (memory_operand (operands[1], TImode))
+           return "vbroadcasti32x4\t{%1, %g0|%g0, %1}";
+         else
+           return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+       }
       /* TDmode values are passed as TImode on the stack.  Moving them
         to stack may result in unaligned memory access.  */
       if (misaligned_operand (operands[0], TImode)


        Jakub

Reply via email to