Emulate MMX maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the mask operand. A warning is issued since invalid memory access may happen when bits 64:127 at memory location are unmapped:
xmmintrin.h:1168:3: note: Emulate MMX maskmovq with SSE2 maskmovdqu may result in invalid memory access 1168 | __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Only SSE register source operand is allowed. PR target/89021 * config/i386/mmx.md (mmx_maskmovq): Emulate MMX maskmovq with SSE2 maskmovdqu and a warning. (sse2_maskmovq_<mode>): New. (*mmx_maskmovq): Add "&& !TARGET_MMX_WITH_SSE". * config/i386/sse.md (*sse2_maskmovdqu): Renamed to ... (sse2_maskmovdqu_<mode>): This. --- gcc/config/i386/mmx.md | 59 ++++++++++++++++++++++++++++++++++++++++-- gcc/config/i386/sse.md | 2 +- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index f90574a7255..92252984482 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1748,7 +1748,62 @@ (match_operand:V8QI 2 "register_operand") (match_dup 0)] UNSPEC_MASKMOV))] - "TARGET_SSE || TARGET_3DNOW_A") + "TARGET_SSE || TARGET_3DNOW_A" +{ + if (TARGET_MMX_WITH_SSE) + { + /* Emulate MMX maskmovq with SSE2 maskmovdqu and issue a warning + since they aren't equivalent. */ + inform (input_location, "Emulate MMX maskmovq with SSE2 maskmovdqu " + "may result in invalid memory access"); + rtx insn; + rtx op = gen_reg_rtx (V2DImode); + if (Pmode == SImode) + insn = gen_sse2_maskmovq_si (XEXP (operands[0], 0), + operands[1], operands[2], op, op); + else + insn = gen_sse2_maskmovq_di (XEXP (operands[0], 0), + operands[1], operands[2], op, op); + emit_insn (insn); + DONE; + } +}) + +(define_insn_and_split "sse2_maskmovq_<mode>" + [(set (mem:V8QI (match_operand:P 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "Yy") + (match_operand:V8QI 2 "register_operand" "Yy") + (mem:V8QI (match_dup 0))] + UNSPEC_MASKMOV)) + (set (match_operand:V2DI 3 "register_operand" "=Yy") + (unspec:V2DI [(match_operand:V2DI 4 "register_operand" "3")] + UNSPEC_MASKMOV))] + "TARGET_MMX_WITH_SSE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + /* Copy the lower 64 bits of operand 2 (the mask operan) to operand 3. + NB: Invalid memory access may happen when bits 64:127 at memory + location are unmapped. */ + rtx op3 = operands[3]; + rtx op2 = gen_rtx_REG (V2DImode, REGNO (operands[2])); + rtx insn = gen_sse2_movq128 (op3, op2); + emit_insn (insn); + + /* Generate SSE2 maskmovdqu with operand 3. */ + rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1])); + op3 = gen_rtx_REG (V16QImode, REGNO (operands[3])); + if (Pmode == SImode) + insn = gen_sse2_maskmovdqu_si (operands[0], op1, op3); + else + insn = gen_sse2_maskmovdqu_di (operands[0], op1, op3); + emit_insn (insn); + DONE; +} + [(set_attr "type" "ssemov") + (set_attr "znver1_decode" "vector") + (set_attr "mode" "TI")]) (define_insn "*mmx_maskmovq" [(set (mem:V8QI (match_operand:P 0 "register_operand" "D")) @@ -1756,7 +1811,7 @@ (match_operand:V8QI 2 "register_operand" "y") (mem:V8QI (match_dup 0))] UNSPEC_MASKMOV))] - "TARGET_SSE || TARGET_3DNOW_A" + "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_MMX_WITH_SSE" ;; @@@ check ordering of operands in intel/nonintel syntax "maskmovq\t{%2, %1|%1, %2}" [(set_attr "type" "mmxcvt") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9ecd9789c1e..7218c9cd646 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15142,7 +15142,7 @@ UNSPEC_MASKMOV))] "TARGET_SSE2") -(define_insn "*sse2_maskmovdqu" +(define_insn "sse2_maskmovdqu_<mode>" [(set (mem:V16QI (match_operand:P 0 "register_operand" "D")) (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") (match_operand:V16QI 2 "register_operand" "x") -- 2.20.1