https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89021
--- Comment #5 from H.J. Lu <hjl.tools at gmail dot com> --- Most of MMX instructions can be implemented with SSE/SSE2. Thee are couple tricky cases: 1. MMX maskmovq vs SSE2 maskmovdqu. They aren't equivalent. I have (define_insn_and_split "sse2_maskmovq_<mode>" [(set (mem:V8QI (match_operand:P 0 "register_operand" "D")) (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "Yy") (match_operand:V8QI 2 "register_operand" "Yy") (mem:V8QI (match_dup 0))] UNSPEC_MASKMOV)) (set (match_operand:V2DI 3 "register_operand" "=Yy") (unspec:V2DI [(match_operand:V2DI 4 "register_operand" "3")] UNSPEC_MASKMOV))] "TARGET_MMX_WITH_SSE" "#" "&& reload_completed" [(const_int 0)] { /* Copy the lower 64 bits of operand 2 to operand 3. NB: Invalid memory access may happen when bits 64:127 at memory location are unmapped. */ rtx op3 = operands[3]; rtx op2 = gen_rtx_REG (V2DImode, REGNO (operands[2])); rtx insn = gen_sse2_movq128 (op3, op2); emit_insn (insn); /* Generate SSE2 maskmovdqu with operand 3. */ rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1])); op3 = gen_rtx_REG (V16QImode, REGNO (operands[3])); if (Pmode == SImode) insn = gen_sse2_maskmovdqu_si (operands[0], op1, op3); else insn = gen_sse2_maskmovdqu_di (operands[0], op1, op3); emit_insn (insn); DONE; } [(set_attr "type" "ssemov") (set_attr "znver1_decode" "vector") (set_attr "mode" "TI")]) 2. MMX movntq vs SSE2 movntidi, which is only for 64-bit mode. I have (define_expand "sse_movntq" [(set (match_operand:DI 0 "memory_operand") (unspec:DI [(match_operand:DI 1 "register_operand")] UNSPEC_MOVNTQ))] "TARGET_SSE || TARGET_3DNOW_A" { if (TARGET_MMX_WITH_SSE) { rtx insn = gen_sse2_movntidi (operands[0], operands[1]); emit_insn (insn); DONE; } }) 3. MMX pshufb vs SSE pshufb. I have (define_expand "ssse3_pshufbv8qi3" [(set (match_operand:V8QI 0 "register_operand") (unspec:V8QI [(match_operand:V8QI 1 "register_operand") (match_operand:V8QI 2 "nonimmediate_operand")] UNSPEC_PSHUFB))] "TARGET_SSSE3" { if (TARGET_MMX_WITH_SSE) { /* Emulate MMX version of pshufb with SSE version by masking out the bit 3 of the shuffle control byte. */ rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7), GEN_INT (0xf7f7f7f7), GEN_INT (0xf7f7f7f7), GEN_INT (0xf7f7f7f7)); rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par); vec_const = force_const_mem (V4SImode, vec_const); rtx op3 = gen_reg_rtx (V4SImode); rtx op4 = gen_reg_rtx (V4SImode); rtx insn = gen_rtx_SET (op4, vec_const); emit_insn (insn); rtx op2 = force_reg (V8QImode, operands[2]); insn = gen_ssse3_pshufbv8qi3_sse (operands[0], operands[1], op2, op3, op4); emit_insn (insn); DONE; } }) and (define_insn_and_split "ssse3_pshufbv8qi3_sse" [(set (match_operand:V8QI 0 "register_operand" "=Yx,Yy") (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,Yy") (match_operand:V8QI 2 "register_operand" "Yx,Yy")] UNSPEC_PSHUFB)) (set (match_operand:V4SI 3 "register_operand" "=Yx,Yy") (unspec:V4SI [(match_operand:V4SI 4 "register_operand" "3,3")] UNSPEC_PSHUFB))] "TARGET_SSSE3 && TARGET_MMX_WITH_SSE" "#" "&& reload_completed" [(const_int 0)] { /* Mask out the bit 3 of the shuffle control byte. */ rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2])); rtx op3 = operands[3]; rtx insn = gen_andv4si3 (op3, op3, op2); emit_insn (insn); /* Generate SSE version of pshufb. */ rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0])); rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1])); op3 = gen_rtx_REG (V16QImode, REGNO (op3)); insn = gen_ssse3_pshufbv16qi3 (op0, op1, op3); emit_insn (insn); DONE; } [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") (set_attr "mode" "TI,TI")])