------- Comment #2 from ubizjak at gmail dot com  2008-05-12 19:23 -------
(In reply to comment #1)
> Also do we need "movq    %xmm1, %xmm2"?

We can help RA a bit by emitting RTL sequence that requires less pseudos.

Index: i386.c
===================================================================
--- i386.c      (revision 135220)
+++ i386.c      (working copy)
@@ -23859,14 +23859,14 @@
        /* For V4SF and V4SI, we implement a concat of two V2 vectors.
           Recurse to load the two halves.  */

+       op1 = gen_reg_rtx (half_mode);
+       v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
+       ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
+
        op0 = gen_reg_rtx (half_mode);
        v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
        ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));

-       op1 = gen_reg_rtx (half_mode);
-       v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
-       ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
-
        use_vec_concat = true;
       }
       break;
@@ -23883,10 +23883,10 @@

   if (use_vec_concat)
     {
+      if (!register_operand (op1, half_mode))
+       op1 = force_reg (half_mode, op1);
       if (!register_operand (op0, half_mode))
        op0 = force_reg (half_mode, op0);
-      if (!register_operand (op1, half_mode))
-       op1 = force_reg (half_mode, op1);

       emit_insn (gen_rtx_SET (VOIDmode, target,
                              gen_rtx_VEC_CONCAT (mode, op0, op1)));


This patch will produce:

_mm_set_epi32:
.LFB2:
        movd    %edi, %xmm0
        movd    %esi, %xmm1
        movd    %edx, %xmm2
        punpckldq       %xmm0, %xmm1
        movd    %ecx, %xmm0
        punpckldq       %xmm2, %xmm0
        punpcklqdq      %xmm1, %xmm0
        ret


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36222

Reply via email to