https://gcc.gnu.org/g:8a38c4404955e793159d07d7b65f780545f3100d

commit r16-6236-g8a38c4404955e793159d07d7b65f780545f3100d
Author: chenxiaolong <[email protected]>
Date:   Thu Dec 11 10:49:05 2025 +0800

    LoongArch: Add support for the TARGET_MODES_TIEABLE_P vectorization type.
    
    v1->v2:
    Add the TARGET_MODES_TIEABLE_P function description and analyze the
    reasons for the cost change of Subreg type rtx after supporting
    vectorization.
    
    This hook returns true if a value of mode mode1 is accessible in mode
    mode2 without copying. On LA, for vector types V4SF and V8SF, the lower
    128 bit data can be shared. After adding vector support in this hook,
    the cost of type conversion for the subreg operation from the V4SF to
    the V8SF registers can be made zero, and some rtx optimization
    operations can be completed in the combine traversal. The comparison
    of the backend support vectors before and after is as follows:
    
    support before:
    
    (insn 7 4 9 2 (set (reg:V8SF 82 [ _6 ])
            (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0))
    (insn 9 7 10 2 (set (reg:V8SF 80 [ _4 ])
            (plus:V8SF (reg:V8SF 82 [ _6 ])
                (reg:V8SF 82 [ _6 ])))
    
    ===>
    
    support after:
    
    (insn 9 7 10 2 (set (reg:V8SF 80 [ _4 ])
            (plus:V8SF (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0)
                (subreg:V8SF (reg:V4SF 86 [ aD.7906 ]) 0)))
    
    gcc/ChangeLog:
    
            * config/loongarch/loongarch.cc (loongarch_modes_tieable_p):
            Add support for vector conversion.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/loongarch/vector/lasx/vect-extract-256-128.c:
            After supporting the vectorized type corresponding to subreg in
            the backend, the cost of rtx becomes 0. In fwprop1 pass,
            memory-loaded rtx cannot be propagated to this insn, which leads
            to xvld not being optimized into vld instructions.
            * gcc.target/loongarch/vect-mode-tieable.c: New test.

Diff:
---
 gcc/config/loongarch/loongarch.cc                  |  6 ++-
 .../gcc.target/loongarch/vect-mode-tieable.c       | 47 ++++++++++++++++++++++
 .../loongarch/vector/lasx/vect-extract-256-128.c   |  6 +--
 3 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index cc0523f2ab72..06d0dcc5efda 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -7514,7 +7514,11 @@ loongarch_modes_tieable_p (machine_mode mode1, 
machine_mode mode2)
          || (GET_MODE_CLASS(mode1) == MODE_FLOAT
              && GET_MODE_CLASS(mode2) == MODE_INT)
          || (GET_MODE_CLASS(mode2) == MODE_FLOAT
-             && GET_MODE_CLASS(mode1) == MODE_INT));
+             && GET_MODE_CLASS(mode1) == MODE_INT)
+         || (GET_MODE_CLASS (mode1) == MODE_VECTOR_INT
+             && GET_MODE_CLASS (mode2) == MODE_VECTOR_INT)
+         || (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
+             &&  GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT));
 }
 
 /* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c 
b/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
new file mode 100644
index 000000000000..d156f92761d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vect-mode-tieable.c
@@ -0,0 +1,47 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvadd.d (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256i
+foo1 (__m128i a)
+{
+  return __lasx_xvadd_d (__lasx_cast_128 (a), __lasx_cast_128 (a));
+}
+
+/*
+**foo2:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvfadd.s        (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256
+foo2 (__m128 a)
+{
+  return __lasx_xvfadd_s (__lasx_cast_128_s (a), __lasx_cast_128_s (a));
+}
+
+/*
+**foo3:
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r5,0
+**     vinsgr2vr.d     (\$vr[0-9]+),\$r6,1
+**     xvfadd.d        (\$xr[0-9]+),(\$xr[0-9]+),(\$xr[0-9]+)
+**     xvst    (\$xr[0-9]+),\$r4,0
+**     jr      \$r1
+*/
+__m256d
+foo3 (__m128d a)
+{
+  return __lasx_xvfadd_d (__lasx_cast_128_d (a), __lasx_cast_128_d (a));
+}
diff --git 
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c 
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
index d2219ea82de2..bdf6e160ace5 100644
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
@@ -6,7 +6,7 @@
 
 /*
 **foo1_lo:
-**     vld     (\$vr[0-9]+),\$r4,0
+**     xvld    (\$xr[0-9]+),\$r4,0
 **     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
 **     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
 **     jr      \$r1
@@ -33,7 +33,7 @@ foo1_hi (__m256 x)
 
 /*
 **foo2_lo:
-**     vld     (\$vr[0-9]+),\$r4,0
+**     xvld    (\$xr[0-9]+),\$r4,0
 **     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
 **     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
 **     jr      \$r1
@@ -60,7 +60,7 @@ foo2_hi (__m256d x)
 
 /*
 **foo3_lo:
-**     vld     (\$vr[0-9]+),\$r4,0
+**     xvld    (\$xr[0-9]+),\$r4,0
 **     vpickve2gr.du   \$r4,(\$vr[0-9]+),0
 **     vpickve2gr.du   \$r5,(\$vr[0-9]+),1
 **     jr      \$r1

Reply via email to