https://gcc.gnu.org/g:91d79053f2b416cb9e97d9c0c3fb5b73075289e6

commit r15-876-g91d79053f2b416cb9e97d9c0c3fb5b73075289e6
Author: Uros Bizjak <ubiz...@gmail.com>
Date:   Tue May 28 20:25:14 2024 +0200

    i386: Improve access to _Atomic DImode location via XMM regs for SSE4.1 
x86_32 targets
    
    Use MOVD/PEXTRD and MOVD/PINSRD insn sequences to move DImode value
    between XMM and GPR register sets for SSE4.1 x86_32 targets in order
    to avoid spilling the value to stack.
    
    The load from _Atomic location a improves from:
    
            movq    a, %xmm0
            movq    %xmm0, (%esp)
            movl    (%esp), %eax
            movl    4(%esp), %edx
    
    to:
            movq    a, %xmm0
            movd    %xmm0, %eax
            pextrd  $1, %xmm0, %edx
    
    The store to _Atomic location b improves from:
    
            movl    %eax, (%esp)
            movl    %edx, 4(%esp)
            movq    (%esp), %xmm0
            movq    %xmm0, b
    
    to:
            movd    %eax, %xmm0
            pinsrd  $1, %edx, %xmm0
            movq    %xmm0, b
    
    gcc/ChangeLog:
    
            * config/i386/sync.md (atomic_loaddi_fpu): Use movd/pextrd
            to move DImode value from XMM to GPR for TARGET_SSE4_1.
            (atomic_storedi_fpu): Use movd/pinsrd to move DImode value
            from GPR to XMM for TARGET_SSE4_1.

Diff:
---
 gcc/config/i386/sync.md | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 8317581ebe2..f2b3ba0aa7a 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -215,8 +215,18 @@
        }
       else
        {
+         rtx tmpdi = gen_lowpart (DImode, tmp);
+
          emit_insn (gen_loaddi_via_sse (tmp, src));
-         emit_insn (gen_storedi_via_sse (mem, tmp));
+
+         if (GENERAL_REG_P (dst)
+             && TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_FROM_VEC)
+           {
+             emit_move_insn (dst, tmpdi);
+             DONE;
+           }
+         else
+           emit_move_insn (mem, tmpdi);
        }
 
       if (mem != dst)
@@ -294,20 +304,30 @@
     emit_move_insn (dst, src);
   else
     {
-      if (REG_P (src))
-       {
-         emit_move_insn (mem, src);
-         src = mem;
-       }
-
       if (STACK_REG_P (tmp))
        {
+         if (GENERAL_REG_P (src))
+           {
+             emit_move_insn (mem, src);
+             src = mem;
+           }
+
          emit_insn (gen_loaddi_via_fpu (tmp, src));
          emit_insn (gen_storedi_via_fpu (dst, tmp));
        }
       else
        {
-         emit_insn (gen_loaddi_via_sse (tmp, src));
+         rtx tmpdi = gen_lowpart (DImode, tmp);
+
+         if (GENERAL_REG_P (src)
+             && !(TARGET_SSE4_1 && TARGET_INTER_UNIT_MOVES_TO_VEC))
+           {
+             emit_move_insn (mem, src);
+             src = mem;
+           }
+
+         emit_move_insn (tmpdi, src);
+
          emit_insn (gen_storedi_via_sse (dst, tmp));
        }
     }

Reply via email to