https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89021

--- Comment #5 from H.J. Lu <hjl.tools at gmail dot com> ---
Most of MMX instructions can be implemented with SSE/SSE2.  Thee are
couple tricky cases:

1. MMX maskmovq vs SSE2 maskmovdqu.  They aren't equivalent.  I have

(define_insn_and_split "sse2_maskmovq_<mode>"
  [(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "Yy")
                      (match_operand:V8QI 2 "register_operand" "Yy")
                      (mem:V8QI (match_dup 0))] 
                     UNSPEC_MASKMOV))
   (set (match_operand:V2DI 3 "register_operand" "=Yy")
        (unspec:V2DI [(match_operand:V2DI 4 "register_operand" "3")]
                     UNSPEC_MASKMOV))]
  "TARGET_MMX_WITH_SSE"
  "#"
  "&& reload_completed"
  [(const_int 0)]
{
  /* Copy the lower 64 bits of operand 2 to operand 3.  NB: Invalid
     memory access may happen when bits 64:127 at memory location are
     unmapped.  */
  rtx op3 = operands[3];
  rtx op2 = gen_rtx_REG (V2DImode, REGNO (operands[2]));
  rtx insn = gen_sse2_movq128 (op3, op2);
  emit_insn (insn);

  /* Generate SSE2 maskmovdqu with operand 3.  */
  rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
  op3 = gen_rtx_REG (V16QImode, REGNO (operands[3]));
  if (Pmode == SImode)
    insn = gen_sse2_maskmovdqu_si (operands[0], op1, op3);
  else 
    insn = gen_sse2_maskmovdqu_di (operands[0], op1, op3);
  emit_insn (insn);
  DONE;
}
  [(set_attr "type" "ssemov")
   (set_attr "znver1_decode" "vector")
   (set_attr "mode" "TI")])

2. MMX movntq vs SSE2 movntidi, which is only for 64-bit mode.  I have

(define_expand "sse_movntq"
  [(set (match_operand:DI 0 "memory_operand")
        (unspec:DI [(match_operand:DI 1 "register_operand")]
                   UNSPEC_MOVNTQ))]
  "TARGET_SSE || TARGET_3DNOW_A"
{
  if (TARGET_MMX_WITH_SSE)
    {
      rtx insn = gen_sse2_movntidi (operands[0], operands[1]);
      emit_insn (insn);
      DONE;
    }
})

3. MMX pshufb vs SSE pshufb.  I have

(define_expand "ssse3_pshufbv8qi3"
  [(set (match_operand:V8QI 0 "register_operand")
        (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
                      (match_operand:V8QI 2 "nonimmediate_operand")]
                     UNSPEC_PSHUFB))]
  "TARGET_SSSE3"
{
  if (TARGET_MMX_WITH_SSE)
    { 
      /* Emulate MMX version of pshufb with SSE version by masking
         out the bit 3 of the shuffle control byte.  */
      rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
                             GEN_INT (0xf7f7f7f7),
                             GEN_INT (0xf7f7f7f7),
                             GEN_INT (0xf7f7f7f7));
      rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
      vec_const = force_const_mem (V4SImode, vec_const);
      rtx op3 = gen_reg_rtx (V4SImode);
      rtx op4 = gen_reg_rtx (V4SImode);
      rtx insn = gen_rtx_SET (op4, vec_const);
      emit_insn (insn);
      rtx op2 = force_reg (V8QImode, operands[2]);
      insn = gen_ssse3_pshufbv8qi3_sse (operands[0], operands[1],
                                        op2, op3, op4);
      emit_insn (insn);
      DONE;
    }
})

and

(define_insn_and_split "ssse3_pshufbv8qi3_sse"
  [(set (match_operand:V8QI 0 "register_operand" "=Yx,Yy")
        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,Yy")
                      (match_operand:V8QI 2 "register_operand" "Yx,Yy")]
                     UNSPEC_PSHUFB))
   (set (match_operand:V4SI 3 "register_operand" "=Yx,Yy")
        (unspec:V4SI [(match_operand:V4SI 4 "register_operand" "3,3")]
                     UNSPEC_PSHUFB))]
  "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
  "#"
  "&& reload_completed"
  [(const_int 0)]
{
  /* Mask out the bit 3 of the shuffle control byte.  */
  rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
  rtx op3 = operands[3];
  rtx insn = gen_andv4si3 (op3, op3, op2);
  emit_insn (insn);
  /* Generate SSE version of pshufb.  */
  rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]));
  rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
  op3 = gen_rtx_REG (V16QImode, REGNO (op3));
  insn = gen_ssse3_pshufbv16qi3 (op0, op1, op3);
  emit_insn (insn);
  DONE;
}
  [(set_attr "isa" "noavx,avx")
   (set_attr "type" "sselog1")
   (set_attr "mode" "TI,TI")])

Reply via email to