On Sat, Feb 09, 2019 at 10:56:38AM +0100, Jakub Jelinek wrote: > On Sat, Feb 09, 2019 at 10:50:43AM +0100, Uros Bizjak wrote: > > > Also need this patch since we no longer set MODE_XI for > > > AVX512VL. > > > > No. Please figure out correct condition to set mode attribute to XImode > > instead. > > If it is AVX512VL, isn't MODE_OI or MODE_TI correct in those cases though? > While the instructions need EVEX encoding if they have [xy]mm{16,...31} > operands, they operate just on 256 or 128 bits.
That said, mov{oi,ti}_internal is severely broken for avx512f without avx512vl even after this patch. I think the following patch, incremental to H.J.'s patch, should fix that. It is pretty much a copy of what sse.md (*mov<mode>_internal) pattern does, just specialized to the particular instructions (i.e. that it is integral, not floating, and always 32-byte or always 16-byte). sse.md has: /* There is no evex-encoded vmov* for sizes smaller than 64-bytes in avx512f, so we need to use workarounds, to access sse registers 16-31, which are evex-only. In avx512vl we don't need workarounds. */ if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL && (EXT_REX_SSE_REG_P (operands[0]) || EXT_REX_SSE_REG_P (operands[1]))) { if (memory_operand (operands[0], <MODE>mode)) { if (<MODE_SIZE> == 32) return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; else if (<MODE_SIZE> == 16) return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; else gcc_unreachable (); } else if (memory_operand (operands[1], <MODE>mode)) { if (<MODE_SIZE> == 32) return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}"; else if (<MODE_SIZE> == 16) return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}"; else gcc_unreachable (); } else /* Reg -> reg move is always aligned. Just use wider move. */ switch (get_attr_mode (insn)) { case MODE_V8SF: case MODE_V4SF: return "vmovaps\t{%g1, %g0|%g0, %g1}"; case MODE_V4DF: case MODE_V2DF: return "vmovapd\t{%g1, %g0|%g0, %g1}"; case MODE_OI: case MODE_TI: return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; default: gcc_unreachable (); } } before it tries to handle the normal cases. Ok for trunk if it passes bootstrap/regtest? 2019-02-09 Jakub Jelinek <ja...@redhat.com> PR target/89229 * config/i386/i386.md (*movoi_internal_avx, *movti_internal): Handle MODE_XI properly. --- gcc/config/i386/i386.md.jj 2019-02-09 11:18:53.995450055 +0100 +++ gcc/config/i386/i386.md 2019-02-09 11:26:04.364342306 +0100 @@ -1905,6 +1905,18 @@ (define_insn "*movoi_internal_avx" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: + /* There is no evex-encoded vmov* for sizes smaller than 64-bytes + in avx512f, so we need to use workarounds to access sse registers + 16-31, which are evex-only. In avx512vl we don't need workarounds. */ + if (get_attr_mode (insn) == MODE_XI) + { + if (memory_operand (operands[0], OImode)) + return "vextracti64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; + else if (memory_operand (operands[1], OImode)) + return "vbroadcasti64x4\t{%1, %g0|%g0, %1}"; + else + return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; + } if (misaligned_operand (operands[0], OImode) || misaligned_operand (operands[1], OImode)) { @@ -1968,6 +1980,18 @@ (define_insn "*movti_internal" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: + /* There is no evex-encoded vmov* for sizes smaller than 64-bytes + in avx512f, so we need to use workarounds to access sse registers + 16-31, which are evex-only. In avx512vl we don't need workarounds. */ + if (get_attr_mode (insn) == MODE_XI) + { + if (memory_operand (operands[0], TImode)) + return "vextracti32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}"; + else if (memory_operand (operands[1], TImode)) + return "vbroadcasti32x4\t{%1, %g0|%g0, %1}"; + else + return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; + } /* TDmode values are passed as TImode on the stack. Moving them to stack may result in unaligned memory access. */ if (misaligned_operand (operands[0], TImode) Jakub