gcc/ChangeLog:
* config/loongarch/lasx.md (vec_cast<mode>): New template
implemention.
(vec_insert_lo_<mode>): Dito.
(vec_insert_hi_<mode>): Dito.
* config/loongarch/lasxintrin.h (defined): Test for adding
the builtin function.
(__lasx_cast_128_s): Dito.
(__lasx_cast_128_d): Dito.
(__lasx_cast_128): Dito.
(__lasx_concat_128_s): Dito.
(__lasx_concat_128_d): Dito.
(__lasx_concat_128): Dito.
(__lasx_extract_128_lo_s): Dito.
(__lasx_extract_128_hi_s): Dito.
(__lasx_extract_128_lo_d): Dito.
(__lasx_extract_128_hi_d): Dito.
(__lasx_extract_128_lo): Dito.
(__lasx_extract_128_hi): Dito.
(__lasx_insert_128_lo_s): Dito.
(__lasx_insert_128_hi_s): Dito.
(__lasx_insert_128_lo_d): Dito.
(__lasx_insert_128_hi_d): Dito.
(__lasx_insert_128_lo): Dito.
(__lasx_insert_128_hi): Dito.
* config/loongarch/loongarch-builtins.cc
(CODE_FOR_lasx_extract_128_lo_s): Add builtins and register
icode.
(CODE_FOR_lasx_extract_128_hi_s): Dito.
(CODE_FOR_lasx_extract_128_lo_d): Dito.
(CODE_FOR_lasx_extract_128_hi_d): Dito.
(CODE_FOR_lasx_extract_128_lo): Dito.
(CODE_FOR_lasx_extract_128_hi): Dito.
(CODE_FOR_lasx_insert_128_lo_s): Dito.
(CODE_FOR_lasx_insert_128_hi_s): Dito.
(CODE_FOR_lasx_insert_128_lo_d): Dito.
(CODE_FOR_lasx_insert_128_hi_d): Dito.
(CODE_FOR_lasx_insert_128_lo): Dito.
(CODE_FOR_lasx_insert_128_hi): Dito.
(CODE_FOR_lasx_concat_128_s): Dito.
(CODE_FOR_lasx_concat_128_d): Dito.
(CODE_FOR_lasx_concat_128): Dito.
(CODE_FOR_lasx_cast_128_s): Dito.
(CODE_FOR_lasx_cast_128_d): Dito.
(CODE_FOR_lasx_cast_128): Dito.
(loongarch_expand_builtin_direct): For the newly added
insertion or extraction, construct the parallel parameter
corresponding to the operand.
* config/loongarch/loongarch-c.cc
(loongarch_update_cpp_builtins): Define
__loongarch_asx_sx_conv.
* config/loongarch/loongarch-ftypes.def: Declare the type
of the builtin function.
* doc/extend.texi: Add document description.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c: New
test.
* gcc.target/loongarch/vector/lasx/vect-concat-128-256.c: New test.
* gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c: New
test.
* gcc.target/loongarch/vector/lasx/vect-extract-256-128.c: New test.
* gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c: New
test.
* gcc.target/loongarch/vector/lasx/vect-insert-128-256.c: New test.
Change-Id: I44edabf9d1eca4a9627697cb586de73833e4923a
---
gcc/config/loongarch/lasx.md | 36 +++
gcc/config/loongarch/lasxintrin.h | 156 +++++++++++
gcc/config/loongarch/loongarch-builtins.cc | 90 +++++-
gcc/config/loongarch/loongarch-c.cc | 1 +
gcc/config/loongarch/loongarch-ftypes.def | 12 +
gcc/doc/extend.texi | 258 +++++++++++++++++-
.../vector/lasx/vect-concat-128-256-result.c | 68 +++++
.../vector/lasx/vect-concat-128-256.c | 92 +++++++
.../vector/lasx/vect-extract-256-128-result.c | 69 +++++
.../vector/lasx/vect-extract-256-128.c | 86 ++++++
.../vector/lasx/vect-insert-128-256-result.c | 97 +++++++
.../vector/lasx/vect-insert-128-256.c | 95 +++++++
12 files changed, 1058 insertions(+), 2 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c
create mode 100644
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256.c
create mode 100644
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c
create mode 100644
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
create mode 100644
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c
create mode 100644
gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256.c
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 3048c483408..80278d0fb7e 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -130,6 +130,7 @@ (define_mode_iterator LASX_D [V4DI V4DF])
;; Only used for splitting insert_d and copy_{u,s}.d.
(define_mode_iterator LASX_WD [V4DI V4DF V8SI V8SF])
+(define_mode_iterator LASX_PART [V4DI V4DF V8SF])
;; Only used for copy256_{u,s}.w.
(define_mode_iterator LASX_W [V8SI V8SF])
@@ -675,6 +676,41 @@ (define_insn "vec_extract_hi_<mode>"
[(set_attr "move_type" "fmove")
(set_attr "mode" "<MODE>")])
+;; vr0 -> low xr0
+;;
+(define_insn "vec_cast<mode>"
+ [(set (match_operand:LASX_PART 0 "register_operand" "=f")
+ (subreg:LASX_PART
+ (match_operand:<VHMODE256_ALL> 1 "register_operand" "0") 0))]
+ "ISA_HAS_LASX"
+ ""
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "vec_insert_lo_<mode>"
+ [(set (match_operand:LASX_PART 0 "register_operand" "=f")
+ (vec_concat:LASX_PART
+ (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")
+ (vec_select:<VHMODE256_ALL>
+ (match_operand:LASX_PART 1 "register_operand" "0")
+ (match_operand:LASX_PART 3 "vect_par_cnst_high_half"))))]
+ "ISA_HAS_LASX"
+ "xvpermi.q\t%u0,%u2,0x30"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "vec_insert_hi_<mode>"
+ [(set (match_operand:LASX_PART 0 "register_operand" "=f")
+ (vec_concat:LASX_PART
+ (vec_select:<VHMODE256_ALL>
+ (match_operand:LASX_PART 1 "register_operand" "0")
+ (match_operand:LASX_PART 3 "vect_par_cnst_low_half"))
+ (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")))]
+ "ISA_HAS_LASX"
+ "xvpermi.q\t%u0,%u2,0x02"
+ [(set_attr "type" "simd_splat")
+ (set_attr "mode" "<MODE>")])
+
(define_expand "vec_perm<mode>"
[(match_operand:LASX 0 "register_operand")
(match_operand:LASX 1 "register_operand")
diff --git a/gcc/config/loongarch/lasxintrin.h
b/gcc/config/loongarch/lasxintrin.h
index 6bcffc26d4a..6c34edeec25 100644
--- a/gcc/config/loongarch/lasxintrin.h
+++ b/gcc/config/loongarch/lasxintrin.h
@@ -23,6 +23,8 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
+#include <lsxintrin.h>
+
#ifndef _GCC_LOONGSON_ASXINTRIN_H
#define _GCC_LOONGSON_ASXINTRIN_H 1
@@ -5368,5 +5370,159 @@ __m256i __lasx_xvfcmp_sun_s (__m256 _1, __m256 _2)
#define __lasx_xvrepli_w(/*si10*/ _1) \
((__m256i)__builtin_lasx_xvrepli_w ((_1)))
+#if defined (__loongarch_asx_sx_conv)
+/* Add builtin interfaces for 128 and 256 vector conversions.
+ For the assembly instruction format of some functions of the following
vector
+ conversion, it is not described exactly in accordance with the format of the
+ generated assembly instruction.
+ In the front end of the Rust language, different built-in functions are
called
+ by analyzing the format of assembly instructions. The data types of
instructions
+ are all defined based on the interfaces of the defined functions, in the
+ following order: output, input... . */
+/* Assembly instruction format: xd, vj. */
+/* Data types in instruction templates: V8SF, V4SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256 __lasx_cast_128_s (__m128 _1)
+{
+ return (__m256)__builtin_lasx_cast_128_s ((v4f32)_1);
+}
+
+/* Assembly instruction format: xd, vj. */
+/* Data types in instruction templates: V4DF, V2DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256d __lasx_cast_128_d (__m128d _1)
+{
+ return (__m256d)__builtin_lasx_cast_128_d ((v2f64)_1);
+}
+
+/* Assembly instruction format: xd, vj. */
+/* Data types in instruction templates: V4DI, V2DI. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256i __lasx_cast_128 (__m128i _1)
+{
+ return (__m256i)__builtin_lasx_cast_128 ((v2i64)_1);
+}
+
+/* Assembly instruction format: xd, vj, vk. */
+/* Data types in instruction templates: V8SF, V4SF, V4SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256 __lasx_concat_128_s (__m128 _1, __m128 _2)
+{
+ return (__m256)__builtin_lasx_concat_128_s ((v4f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format: xd, vj, vk. */
+/* Data types in instruction templates: V4DF, V2DF, V2DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256d __lasx_concat_128_d (__m128d _1, __m128d _2)
+{
+ return (__m256d)__builtin_lasx_concat_128_d ((v2f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format: xd, vj, vk. */
+/* Data types in instruction templates: V4DI, V2DI, V2DI. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256i __lasx_concat_128 (__m128i _1, __m128i _2)
+{
+ return (__m256i)__builtin_lasx_concat_128 ((v2i64)_1, (v2i64)_2);
+}
+
+/* Assembly instruction format: vd, xj. */
+/* Data types in instruction templates: V4SF, V8SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m128 __lasx_extract_128_lo_s (__m256 _1)
+{
+ return (__m128)__builtin_lasx_extract_128_lo_s ((v8f32)_1);
+}
+
+/* Assembly instruction format: vd, xj. */
+/* Data types in instruction templates: V4SF, V8SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m128 __lasx_extract_128_hi_s (__m256 _1)
+{
+ return (__m128)__builtin_lasx_extract_128_hi_s ((v8f32)_1);
+}
+
+/* Assembly instruction format: vd, xj. */
+/* Data types in instruction templates: V2DF, V4DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m128d __lasx_extract_128_lo_d (__m256d _1)
+{
+ return (__m128d)__builtin_lasx_extract_128_lo_d ((v4f64)_1);
+}
+
+/* Assembly instruction format: vd, xj. */
+/* Data types in instruction templates: V2DF, V4DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m128d __lasx_extract_128_hi_d (__m256d _1)
+{
+ return (__m128d)__builtin_lasx_extract_128_hi_d ((v4f64)_1);
+}
+
+/* Assembly instruction format: vd, xj. */
+/* Data types in instruction templates: V2DI, V4DI. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m128i __lasx_extract_128_lo (__m256i _1)
+{
+ return (__m128i)__builtin_lasx_extract_128_lo ((v4i64)_1);
+}
+
+/* Assembly instruction format: vd, xj. */
+/* Data types in instruction templates: V2DI, V4DI. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m128i __lasx_extract_128_hi (__m256i _1)
+{
+ return (__m128i)__builtin_lasx_extract_128_hi ((v4i64)_1);
+}
+
+/* Assembly instruction format: xd, xj, vk. */
+/* Data types in instruction templates: V8SF, V8SF, V4SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256 __lasx_insert_128_lo_s (__m256 _1, __m128 _2)
+{
+ return (__m256)__builtin_lasx_insert_128_lo_s ((v8f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format: xd, xj, vk. */
+/* Data types in instruction templates: V8SF, V8SF, V4SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256 __lasx_insert_128_hi_s (__m256 _1, __m128 _2)
+{
+ return (__m256)__builtin_lasx_insert_128_hi_s ((v8f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format: xd, xj, vk. */
+/* Data types in instruction templates: V4DF, V4DF, V2DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256d __lasx_insert_128_lo_d (__m256d _1, __m128d _2)
+{
+ return (__m256d)__builtin_lasx_insert_128_lo_d ((v4f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format: xd, xj, vk. */
+/* Data types in instruction templates: V4DF, V4DF, V2DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256d __lasx_insert_128_hi_d (__m256d _1, __m128d _2)
+{
+ return (__m256d)__builtin_lasx_insert_128_hi_d ((v4f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format: xd, xj, vk. */
+/* Data types in instruction templates: V4DI, V4DI, V2DI. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256i __lasx_insert_128_lo (__m256i _1, __m128i _2)
+{
+ return (__m256i)__builtin_lasx_insert_128_lo ((v4i64)_1, (v2i64)_2);
+}
+
+/* Assembly instruction format: xd, xj, vk. */
+/* Data types in instruction templates: V4DI, V4DI, V2DI. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+__m256i __lasx_insert_128_hi (__m256i _1, __m128i _2)
+{
+ return (__m256i)__builtin_lasx_insert_128_hi ((v4i64)_1, (v2i64)_2);
+}
+
+#endif /* defined(__loongarch_asx_sx_conv). */
#endif /* defined(__loongarch_asx). */
#endif /* _GCC_LOONGSON_ASXINTRIN_H. */
diff --git a/gcc/config/loongarch/loongarch-builtins.cc
b/gcc/config/loongarch/loongarch-builtins.cc
index 9493dedcab7..312d87626a4 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -865,6 +865,27 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lasx_xvmaddwod_q_du CODE_FOR_lasx_maddwod_q_du_punned
#define CODE_FOR_lasx_xvmaddwod_q_du_d CODE_FOR_lasx_maddwod_q_du_d_punned
+
+/* Add mutual conversion between 128 and 256 vectors. */
+#define CODE_FOR_lasx_extract_128_lo_s CODE_FOR_vec_extract_lo_v8sf
+#define CODE_FOR_lasx_extract_128_hi_s CODE_FOR_vec_extract_hi_v8sf
+#define CODE_FOR_lasx_extract_128_lo_d CODE_FOR_vec_extract_lo_v4df
+#define CODE_FOR_lasx_extract_128_hi_d CODE_FOR_vec_extract_hi_v4df
+#define CODE_FOR_lasx_extract_128_lo CODE_FOR_vec_extract_lo_v4di
+#define CODE_FOR_lasx_extract_128_hi CODE_FOR_vec_extract_hi_v4di
+#define CODE_FOR_lasx_insert_128_lo_s CODE_FOR_vec_insert_lo_v8sf
+#define CODE_FOR_lasx_insert_128_hi_s CODE_FOR_vec_insert_hi_v8sf
+#define CODE_FOR_lasx_insert_128_lo_d CODE_FOR_vec_insert_lo_v4df
+#define CODE_FOR_lasx_insert_128_hi_d CODE_FOR_vec_insert_hi_v4df
+#define CODE_FOR_lasx_insert_128_lo CODE_FOR_vec_insert_lo_v4di
+#define CODE_FOR_lasx_insert_128_hi CODE_FOR_vec_insert_hi_v4di
+#define CODE_FOR_lasx_concat_128_s CODE_FOR_vec_concatv8sf
+#define CODE_FOR_lasx_concat_128_d CODE_FOR_vec_concatv4df
+#define CODE_FOR_lasx_concat_128 CODE_FOR_vec_concatv4di
+#define CODE_FOR_lasx_cast_128_s CODE_FOR_vec_castv8sf
+#define CODE_FOR_lasx_cast_128_d CODE_FOR_vec_castv4df
+#define CODE_FOR_lasx_cast_128 CODE_FOR_vec_castv4di
+
static const struct loongarch_builtin_description loongarch_builtins[] = {
#define LARCH_MOVFCSR2GR 0
DIRECT_BUILTIN (movfcsr2gr, LARCH_USI_FTYPE_UQI, hard_float),
@@ -2407,7 +2428,25 @@ static const struct loongarch_builtin_description
loongarch_builtins[] = {
LASX_BUILTIN (xvssrarni_bu_h, LARCH_UV32QI_FTYPE_UV32QI_V32QI_USI),
LASX_BUILTIN (xvssrarni_hu_w, LARCH_UV16HI_FTYPE_UV16HI_V16HI_USI),
LASX_BUILTIN (xvssrarni_wu_d, LARCH_UV8SI_FTYPE_UV8SI_V8SI_USI),
- LASX_BUILTIN (xvssrarni_du_q, LARCH_UV4DI_FTYPE_UV4DI_V4DI_USI)
+ LASX_BUILTIN (xvssrarni_du_q, LARCH_UV4DI_FTYPE_UV4DI_V4DI_USI),
+ LASX_BUILTIN (extract_128_lo_s, LARCH_V4SF_FTYPE_V8SF),
+ LASX_BUILTIN (extract_128_hi_s, LARCH_V4SF_FTYPE_V8SF),
+ LASX_BUILTIN (extract_128_lo_d, LARCH_V2DF_FTYPE_V4DF),
+ LASX_BUILTIN (extract_128_hi_d, LARCH_V2DF_FTYPE_V4DF),
+ LASX_BUILTIN (extract_128_lo, LARCH_V2DI_FTYPE_V4DI),
+ LASX_BUILTIN (extract_128_hi, LARCH_V2DI_FTYPE_V4DI),
+ LASX_BUILTIN (insert_128_lo_s, LARCH_V8SF_FTYPE_V8SF_V4SF),
+ LASX_BUILTIN (insert_128_hi_s, LARCH_V8SF_FTYPE_V8SF_V4SF),
+ LASX_BUILTIN (insert_128_lo_d, LARCH_V4DF_FTYPE_V4DF_V2DF),
+ LASX_BUILTIN (insert_128_hi_d, LARCH_V4DF_FTYPE_V4DF_V2DF),
+ LASX_BUILTIN (insert_128_lo, LARCH_V4DI_FTYPE_V4DI_V2DI),
+ LASX_BUILTIN (insert_128_hi, LARCH_V4DI_FTYPE_V4DI_V2DI),
+ LASX_BUILTIN (concat_128_s, LARCH_V8SF_FTYPE_V4SF_V4SF),
+ LASX_BUILTIN (concat_128_d, LARCH_V4DF_FTYPE_V2DF_V2DF),
+ LASX_BUILTIN (concat_128, LARCH_V4DI_FTYPE_V2DI_V2DI),
+ LASX_BUILTIN (cast_128_s, LARCH_V8SF_FTYPE_V4SF),
+ LASX_BUILTIN (cast_128_d, LARCH_V4DF_FTYPE_V2DF),
+ LASX_BUILTIN (cast_128, LARCH_V4DI_FTYPE_V2DI)
};
/* Index I is the function declaration for loongarch_builtins[I], or null if
@@ -3001,6 +3040,10 @@ loongarch_expand_builtin_direct (enum insn_code icode,
rtx target, tree exp,
{
struct expand_operand ops[MAX_RECOG_OPERANDS];
int opno, argno;
+ /* For vector extraction/insertion operations, sel_high_p being true
+ indicates that the high of the data is selected/retained from the
+ vector register. */
+ bool sel_high_p = true;
/* Map any target to operand 0. */
opno = 0;
@@ -3019,6 +3062,51 @@ loongarch_expand_builtin_direct (enum insn_code icode,
rtx target, tree exp,
create_input_operand (&ops[1], CONST1_RTX (ops[0].mode), ops[0].mode);
return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p);
+ case CODE_FOR_vec_extract_lo_v8sf:
+ case CODE_FOR_vec_extract_lo_v4df:
+ case CODE_FOR_vec_extract_lo_v4di:
+ sel_high_p = false;
+ /* Fall through. */
+ case CODE_FOR_vec_extract_hi_v8sf:
+ case CODE_FOR_vec_extract_hi_v4df:
+ case CODE_FOR_vec_extract_hi_v4di:
+ {
+ /* The selection method for constructing the high/low half. */
+ loongarch_prepare_builtin_arg (&ops[1], exp, 0);
+ int nelts = GET_MODE_NUNITS (GET_MODE (ops[1].value));
+ int half_nelts = nelts / 2;
+ int base = sel_high_p ? half_nelts : 0;
+
+ rtx pat_rtx
+ = loongarch_gen_stepped_int_parallel (half_nelts, base, 1);
+ create_input_operand (&ops[2], pat_rtx, ops[1].mode);
+
+ return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p);
+ }
+
+ case CODE_FOR_vec_insert_hi_v8sf:
+ case CODE_FOR_vec_insert_hi_v4df:
+ case CODE_FOR_vec_insert_hi_v4di:
+ sel_high_p = false;
+ /* Fall through. */
+ case CODE_FOR_vec_insert_lo_v8sf:
+ case CODE_FOR_vec_insert_lo_v4df:
+ case CODE_FOR_vec_insert_lo_v4di:
+ {
+ /* The selection method for constructing the high/low half. */
+ loongarch_prepare_builtin_arg (&ops[1], exp, 0);
+ loongarch_prepare_builtin_arg (&ops[2], exp, 1);
+ int nelts = GET_MODE_NUNITS (GET_MODE (ops[1].value));
+ int half_nelts = nelts / 2;
+ int base = sel_high_p ? half_nelts : 0;
+
+ rtx pat_rtx
+ = loongarch_gen_stepped_int_parallel (half_nelts, base, 1);
+ create_input_operand (&ops[3], pat_rtx, ops[1].mode);
+
+ return loongarch_expand_builtin_insn (icode, 4, ops, has_target_p);
+ }
+
default:
break;
}
diff --git a/gcc/config/loongarch/loongarch-c.cc
b/gcc/config/loongarch/loongarch-c.cc
index effdcf0e255..fc031a6fe90 100644
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -132,6 +132,7 @@ loongarch_update_cpp_builtins (cpp_reader *pfile)
loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_simd", pfile);
loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_sx", pfile);
loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx", pfile);
+ loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx_sx_conv", pfile);
builtin_undef ("__loongarch_simd_width");
if (ISA_HAS_LSX)
diff --git a/gcc/config/loongarch/loongarch-ftypes.def
b/gcc/config/loongarch/loongarch-ftypes.def
index 337f2c2c229..68b1b446182 100644
--- a/gcc/config/loongarch/loongarch-ftypes.def
+++ b/gcc/config/loongarch/loongarch-ftypes.def
@@ -42,6 +42,12 @@ DEF_LARCH_FTYPE (1, (USI, USI))
DEF_LARCH_FTYPE (1, (UDI, USI))
DEF_LARCH_FTYPE (1, (USI, UQI))
DEF_LARCH_FTYPE (1, (VOID, USI))
+DEF_LARCH_FTYPE (1, (V4SF, V8SF))
+DEF_LARCH_FTYPE (1, (V2DF, V4DF))
+DEF_LARCH_FTYPE (1, (V2DI, V4DI))
+DEF_LARCH_FTYPE (1, (V8SF, V4SF))
+DEF_LARCH_FTYPE (1, (V4DF, V2DF))
+DEF_LARCH_FTYPE (1, (V4DI, V2DI))
DEF_LARCH_FTYPE (2, (VOID, UQI, USI))
DEF_LARCH_FTYPE (2, (VOID, UHI, USI))
@@ -58,6 +64,12 @@ DEF_LARCH_FTYPE (2, (SI, SI, SI))
DEF_LARCH_FTYPE (2, (SI, DI, SI))
DEF_LARCH_FTYPE (2, (USI, USI, USI))
DEF_LARCH_FTYPE (2, (UDI, UDI, USI))
+DEF_LARCH_FTYPE (2, (V8SF, V4SF, V4SF))
+DEF_LARCH_FTYPE (2, (V4DF, V2DF, V2DF))
+DEF_LARCH_FTYPE (2, (V4DI, V2DI, V2DI))
+DEF_LARCH_FTYPE (2, (V8SF, V8SF, V4SF))
+DEF_LARCH_FTYPE (2, (V4DF, V4DF, V2DF))
+DEF_LARCH_FTYPE (2, (V4DI, V4DI, V2DI))
DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI))
DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI))
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 882c0820a6e..ae7aaa8c85b 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -19688,7 +19688,16 @@ into the data cache. The instruction is issued in
slot I1@.
These built-in functions are available for LoongArch.
-Data Type Description:
+@menu
+* Data Types::
+* Directly-mapped Builtin Functions::
+* Directly-mapped Division Builtin Functions::
+* Other Builtin Functions::
+@end menu
+
+@node Data Types
+@subsubsection Data Types
+
@itemize
@item @code{imm0_31}, a compile-time constant in range 0 to 31;
@item @code{imm0_16383}, a compile-time constant in range 0 to 16383;
@@ -19696,6 +19705,9 @@ Data Type Description:
@item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047;
@end itemize
+@node Directly-mapped Builtin Functions
+@subsubsection Directly-mapped Builtin Functions
+
The intrinsics provided are listed below:
@smallexample
unsigned int __builtin_loongarch_movfcsr2gr (imm0_31)
@@ -19819,6 +19831,9 @@ function you need to include @code{larchintrin.h}.
void __break (imm0_32767)
@end smallexample
+@node Directly-mapped Division Builtin Functions
+@subsubsection Directly-mapped Division Builtin Functions
+
These intrinsic functions are available by including @code{larchintrin.h} and
using @option{-mfrecipe}.
@smallexample
@@ -19828,6 +19843,9 @@ using @option{-mfrecipe}.
double __frsqrte_d (double);
@end smallexample
+@node Other Builtin Functions
+@subsubsection Other Builtin Functions
+
Additional built-in functions are available for LoongArch family
processors to efficiently use 128-bit floating-point (__float128)
values.
@@ -19854,6 +19872,15 @@ GCC provides intrinsics to access the LSX (Loongson
SIMD Extension) instructions
The interface is made available by including @code{<lsxintrin.h>} and using
@option{-mlsx}.
+@menu
+* SX Data Types::
+* Directly-mapped SX Builtin Functions::
+* Directly-mapped SX Division Builtin Functions::
+@end menu
+
+@node SX Data Types
+@subsubsection SX Data Types
+
The following vectors typedefs are included in @code{lsxintrin.h}:
@itemize
@@ -19881,6 +19908,9 @@ input/output values manipulated:
@item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047.
@end itemize
+@node Directly-mapped SX Builtin Functions
+@subsubsection Directly-mapped SX Builtin Functions
+
For convenience, GCC defines functions @code{__lsx_vrepli_@{b/h/w/d@}} and
@code{__lsx_b[n]z_@{v/b/h/w/d@}}, which are implemented as follows:
@@ -20664,6 +20694,9 @@ __m128i __lsx_vxori_b (__m128i, imm0_255);
__m128i __lsx_vxor_v (__m128i, __m128i);
@end smallexample
+@node Directly-mapped SX Division Builtin Functions
+@subsubsection Directly-mapped SX Division Builtin Functions
+
These intrinsic functions are available by including @code{lsxintrin.h} and
using @option{-mfrecipe} and @option{-mlsx}.
@smallexample
@@ -20680,6 +20713,16 @@ GCC provides intrinsics to access the LASX (Loongson
Advanced SIMD Extension)
instructions. The interface is made available by including
@code{<lasxintrin.h>}
and using @option{-mlasx}.
+@menu
+* ASX Data Types::
+* Directly-mapped ASX Builtin Functions::
+* Directly-mapped ASX Division Builtin Functions::
+* Directly-mapped SX and ASX Conversion Builtin Functions::
+@end menu
+
+@node ASX Data Types
+@subsubsection ASX Data Types
+
The following vectors typedefs are included in @code{lasxintrin.h}:
@itemize
@@ -20708,6 +20751,9 @@ input/output values manipulated:
@item @code{imm_n2048_2047}, an integer literal in range -2048 to 2047.
@end itemize
+@node Directly-mapped ASX Builtin Functions
+@subsubsection Directly-mapped ASX Builtin Functions
+
For convenience, GCC defines functions @code{__lasx_xvrepli_@{b/h/w/d@}} and
@code{__lasx_b[n]z_@{v/b/h/w/d@}}, which are implemented as follows:
@@ -21512,6 +21558,9 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255);
__m256i __lasx_xvxor_v (__m256i, __m256i);
@end smallexample
+@node Directly-mapped ASX Division Builtin Functions
+@subsubsection Directly-mapped ASX Division Builtin Functions
+
These intrinsic functions are available by including @code{lasxintrin.h} and
using @option{-mfrecipe} and @option{-mlasx}.
@smallexample
@@ -21521,6 +21570,213 @@ __m256d __lasx_xvfrsqrte_d (__m256d);
__m256 __lasx_xvfrsqrte_s (__m256);
@end smallexample
+@node Directly-mapped SX and ASX Conversion Builtin Functions
+@subsubsection Directly-mapped SX and ASX Conversion Builtin Functions
+
+For convenience, the @code{lsxintrin.h} file was imported into @code{
+lasxintrin.h} and 18 new interface functions for 128 and 256 vector
+conversions were added, using the @option{-mlasx} option.
+@smallexample
+__m256 __lasx_cast_128_s (__m128);
+__m256d __lasx_cast_128_d (__m128d);
+__m256i __lasx_cast_128 (__m128i);
+__m256 __lasx_concat_128_s (__m128, __m128);
+__m256d __lasx_concat_128_d (__m128d, __m128d);
+__m256i __lasx_concat_128 (__m128i, __m128i);
+__m128 __lasx_extract_128_lo_s (__m256);
+__m128 __lasx_extract_128_hi_s (__m256);
+__m128d __lasx_extract_128_lo_d (__m256d);
+__m128d __lasx_extract_128_hi_d (__m256d);
+__m128i __lasx_extract_128_lo (__m256i);
+__m128i __lasx_extract_128_hi (__m256i);
+__m256 __lasx_insert_128_lo_s (__m256, __m128);
+__m256 __lasx_insert_128_hi_s (__m256, __m128);
+__m256d __lasx_insert_128_lo_d (__m256d, __m128d);
+__m256d __lasx_insert_128_hi_d (__m256d, __m128d);
+__m256i __lasx_insert_128_lo (__m256i, __m128i);
+__m256i __lasx_insert_128_hi (__m256i, __m128i);
+@end smallexample
+
+When gcc does not support interfaces for 128 and 256 conversions,
+use the following code for equivalent substitution.
+
+@smallexample
+
+ #ifndef __loongarch_asx_sx_conv
+
+ #include <lasxintrin.h>
+ #include <lsxintrin.h>
+ __m256 inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_cast_128_s (__m128 src)
+ @{
+ __m256 dest;
+ asm ("" : "=f"(dest) : "0"(src));
+ return dest;
+ @}
+
+ __m256d inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_cast_128_d (__m128d src)
+ @{
+ __m256d dest;
+ asm ("" : "=f"(dest) : "0"(src));
+ return dest;
+ @}
+
+ __m256i inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_cast_128 (__m128i src)
+ @{
+ __m256i dest;
+ asm ("" : "=f"(dest) : "0"(src));
+ return dest;
+ @}
+
+ __m256 inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_concat_128_s (__m128 src1, __m128 src2)
+ @{
+ __m256 dest;
+ asm ("xvpermi.q %u0,%u2,0x02\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+
+ __m256d inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_concat_128_d (__m128d src1, __m128d src2)
+ @{
+ __m256d dest;
+ asm ("xvpermi.q %u0,%u2,0x02\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+
+ __m256i inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_concat_128 (__m128i src1, __m128i src2)
+ @{
+ __m256i dest;
+ asm ("xvpermi.q %u0,%u2,0x02\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+
+ __m128 inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_extract_128_lo_s (__m256 src)
+ @{
+ __m128 dest;
+ asm ("" : "=f"(dest) : "0"(src));
+ return dest;
+ @}
+
+ __m128d inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_extract_128_lo_d (__m256d src)
+ @{
+ __m128d dest;
+ asm ("" : "=f"(dest) : "0"(src));
+ return dest;
+ @}
+
+ __m128i inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_extract_128_lo (__m256i src)
+ @{
+ __m128i dest;
+ asm ("" : "=f"(dest) : "0"(src));
+ return dest;
+ @}
+
+ __m128 inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_extract_128_hi_s (__m256 src)
+ @{
+ __m128 dest;
+ asm ("xvpermi.d %u0,%u1,0xe\n"
+ : "=f"(dest)
+ : "f"(src));
+ return dest;
+ @}
+
+ __m128d inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_extract_128_hi_d (__m256d src)
+ @{
+ __m128d dest;
+ asm ("xvpermi.d %u0,%u1,0xe\n"
+ : "=f"(dest)
+ : "f"(src));
+ return dest;
+ @}
+
+ __m128i inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_extract_128_hi (__m256i src)
+ @{
+ __m128i dest;
+ asm ("xvpermi.d %u0,%u1,0xe\n"
+ : "=f"(dest)
+ : "f"(src));
+ return dest;
+ @}
+
+ __m256 inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_insert_128_lo_s (__m256 src1, __m128 src2)
+ @{
+ __m256 dest;
+ asm ("xvpermi.q %u0,%u2,0x30\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+
+ __m256d inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_insert_128_lo_d (__m256d a, __m128d b)
+ @{
+ __m256d dest;
+ asm ("xvpermi.q %u0,%u2,0x30\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+
+ __m256i inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_insert_128_lo (__m256i src1, __m128i src2)
+ @{
+ __m256i dest;
+ asm ("xvpermi.q %u0,%u2,0x30\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+
+ __m256 inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_insert_128_hi_s (__m256 src1, __m128 src2)
+ @{
+ __m256 dest;
+ asm ("xvpermi.q %u0,%u2,0x02\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+
+ __m256d inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_insert_128_hi_d (__m256d src1, __m128d src2)
+ @{
+ __m256d dest;
+ asm ("xvpermi.q %u0,%u2,0x02\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+
+ __m256i inline __attribute__((__gnu_inline__, __always_inline__,
__artificial__))
+ __lasx_insert_128_hi (__m256i src1, __m128i src2)
+ @{
+ __m256i dest;
+ asm ("xvpermi.q %u0,%u2,0x02\n"
+ : "=f"(dest)
+ : "0"(src1), "f"(src2));
+ return dest;
+ @}
+ #endif
+
+@end smallexample
+
@node MIPS DSP Built-in Functions
@subsection MIPS DSP Built-in Functions
diff --git
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c
new file mode 100644
index 00000000000..e876c4a0b1a
--- /dev/null
+++
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256-result.c
@@ -0,0 +1,68 @@
+/* { dg-options "-mabi=lp64d -O2 -mlasx -w -fno-strict-aliasing" } */
+
+#include "../simd_correctness_check.h"
+#include <lasxintrin.h>
+
+int
+main ()
+{
+ __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+ __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+ __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+ __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result;
+ __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result;
+ __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result;
+
+ //__m128_op0={1,2,3,4},__m128_op1={5,6,7,8};
+ *((int *)&__m128_op0[3]) = 0x40800000;
+ *((int *)&__m128_op0[2]) = 0x40400000;
+ *((int *)&__m128_op0[1]) = 0x40000000;
+ *((int *)&__m128_op0[0]) = 0x3f800000;
+ *((int *)&__m128_op1[3]) = 0x41000000;
+ *((int *)&__m128_op1[2]) = 0x40e00000;
+ *((int *)&__m128_op1[1]) = 0x40c00000;
+ *((int *)&__m128_op1[0]) = 0x40a00000;
+ *((int *)&__m256_result[7]) = 0x41000000;
+ *((int *)&__m256_result[6]) = 0x40e00000;
+ *((int *)&__m256_result[5]) = 0x40c00000;
+ *((int *)&__m256_result[4]) = 0x40a00000;
+ *((int *)&__m256_result[3]) = 0x40800000;
+ *((int *)&__m256_result[2]) = 0x40400000;
+ *((int *)&__m256_result[1]) = 0x40000000;
+ *((int *)&__m256_result[0]) = 0x3f800000;
+ __m256_out = __lasx_concat_128_s (__m128_op0, __m128_op1);
+ ASSERTEQ_32 (__LINE__, __m256_result, __m256_out);
+ __m256_out = __lasx_cast_128_s (__m128_op0);
+ ASSERTEQ_32 (__LINE__, __m256_out, __m128_op0);
+
+ //__m128i_op0={1,2},__m128i_op1={3,4};
+ *((unsigned long *)&__m128i_op0[1]) = 0x2;
+ *((unsigned long *)&__m128i_op0[0]) = 0x1;
+ *((unsigned long *)&__m128i_op1[1]) = 0x4;
+ *((unsigned long *)&__m128i_op1[0]) = 0x3;
+ *((unsigned long *)&__m256i_result[3]) = 0x4;
+ *((unsigned long *)&__m256i_result[2]) = 0x3;
+ *((unsigned long *)&__m256i_result[1]) = 0x2;
+ *((unsigned long *)&__m256i_result[0]) = 0x1;
+ __m256i_out = __lasx_concat_128 (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
+ __m256i_out = __lasx_cast_128 (__m128i_op0);
+ ASSERTEQ_64 (__LINE__, __m256i_out, __m128i_op0);
+
+ //__m128d_op0={1,2},__m128i_op1={3,4};
+ *((unsigned long *)&__m128d_op0[1]) = 0x4000000000000000;
+ *((unsigned long *)&__m128d_op0[0]) = 0x3ff0000000000000;
+ *((unsigned long *)&__m128d_op1[1]) = 0x4010000000000000;
+ *((unsigned long *)&__m128d_op1[0]) = 0x4008000000000000;
+ *((unsigned long *)&__m256d_result[3]) = 0x4010000000000000;
+ *((unsigned long *)&__m256d_result[2]) = 0x4008000000000000;
+ *((unsigned long *)&__m256d_result[1]) = 0x4000000000000000;
+ *((unsigned long *)&__m256d_result[0]) = 0x3ff0000000000000;
+ __m256d_out = __lasx_concat_128_d (__m128d_op0, __m128d_op1);
+ ASSERTEQ_64 (__LINE__, __m256d_result, __m256d_out);
+ __m256d_out = __lasx_cast_128_d (__m128d_op0);
+ ASSERTEQ_64 (__LINE__, __m256d_out, __m128d_op0);
+
+ return 0;
+}
diff --git
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256.c
new file mode 100644
index 00000000000..5d8cbb2e68f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-concat-128-256.c
@@ -0,0 +1,92 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** vinsgr2vr.d (\$vr[0-9]+),\$r8,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x02
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256
+foo1 (__m128 x, __m128 y)
+{
+ return __builtin_lasx_concat_128_s (x, y);
+}
+
+/*
+**foo2:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** vinsgr2vr.d (\$vr[0-9]+),\$r8,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x02
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256d
+foo2 (__m128d x, __m128d y)
+{
+ return __builtin_lasx_concat_128_d (x, y);
+}
+
+/*
+**foo3:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** vinsgr2vr.d (\$vr[0-9]+),\$r8,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x02
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256i
+foo3 (__m128i x, __m128i y)
+{
+ return __builtin_lasx_concat_128 (x, y);
+}
+
+/*
+**foo4:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256
+foo4 (__m128 x)
+{
+ return __builtin_lasx_cast_128_s (x);
+}
+
+/*
+**foo5:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256d
+foo5 (__m128d x)
+{
+ return __builtin_lasx_cast_128_d (x);
+}
+
+/*
+**foo6:
+** vinsgr2vr.d (\$vr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,1
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256i
+foo6 (__m128i x)
+{
+ return __builtin_lasx_cast_128 (x);
+}
diff --git
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c
new file mode 100644
index 00000000000..61064d6e1b7
--- /dev/null
+++
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128-result.c
@@ -0,0 +1,69 @@
+/* { dg-options "-mabi=lp64d -O2 -mlasx -w -fno-strict-aliasing" } */
+
+#include "../simd_correctness_check.h"
+#include <lasxintrin.h>
+
+extern void abort (void);
+int
+main ()
+{
+ __m128i __m128i_result0, __m128i_result1, __m128i_out, __m128i_result;
+ __m128 __m128_result0, __m128_result1, __m128_out, __m128_result;
+ __m128d __m128d_result0, __m128d_result1, __m128d_out, __m128d_result;
+
+ __m256i __m256i_op0, __m256i_op1, __m256i_op2, __m256i_out, __m256i_result;
+ __m256 __m256_op0, __m256_op1, __m256_op2, __m256_out, __m256_result;
+ __m256d __m256d_op0, __m256d_op1, __m256d_op2, __m256d_out, __m256d_result;
+
+ //__m256_op0 = {1,2,3,4,5,6,7,8};
+ *((int *)&__m256_op0[7]) = 0x41000000;
+ *((int *)&__m256_op0[6]) = 0x40e00000;
+ *((int *)&__m256_op0[5]) = 0x40c00000;
+ *((int *)&__m256_op0[4]) = 0x40a00000;
+ *((int *)&__m256_op0[3]) = 0x40800000;
+ *((int *)&__m256_op0[2]) = 0x40400000;
+ *((int *)&__m256_op0[1]) = 0x40000000;
+ *((int *)&__m256_op0[0]) = 0x3f800000;
+ *((int *)&__m128_result1[3]) = 0x41000000;
+ *((int *)&__m128_result1[2]) = 0x40e00000;
+ *((int *)&__m128_result1[1]) = 0x40c00000;
+ *((int *)&__m128_result1[0]) = 0x40a00000;
+ *((int *)&__m128_result0[3]) = 0x40800000;
+ *((int *)&__m128_result0[2]) = 0x40400000;
+ *((int *)&__m128_result0[1]) = 0x40000000;
+ *((int *)&__m128_result0[0]) = 0x3f800000;
+ __m128_out = __lasx_extract_128_lo_s (__m256_op0);
+ ASSERTEQ_32 (__LINE__, __m128_result0, __m128_out);
+ __m128_out = __lasx_extract_128_hi_s (__m256_op0);
+ ASSERTEQ_32 (__LINE__, __m128_result1, __m128_out);
+
+ //__m256i_op0 = {1,2,3,4};
+ *((unsigned long *)&__m256i_op0[3]) = 0x4;
+ *((unsigned long *)&__m256i_op0[2]) = 0x3;
+ *((unsigned long *)&__m256i_op0[1]) = 0x2;
+ *((unsigned long *)&__m256i_op0[0]) = 0x1;
+ *((unsigned long *)&__m128i_result0[1]) = 0x2;
+ *((unsigned long *)&__m128i_result0[0]) = 0x1;
+ *((unsigned long *)&__m128i_result1[1]) = 0x4;
+ *((unsigned long *)&__m128i_result1[0]) = 0x3;
+ __m128i_out = __lasx_extract_128_lo (__m256i_op0);
+ ASSERTEQ_64 (__LINE__, __m128i_result0, __m128i_out);
+ __m128i_out = __lasx_extract_128_hi (__m256i_op0);
+ ASSERTEQ_64 (__LINE__, __m128i_result1, __m128i_out);
+
+ //__m256d_op0 = {1,2,3,4};
+ *((unsigned long *)&__m256d_op0[3]) = 0x4010000000000000;
+ *((unsigned long *)&__m256d_op0[2]) = 0x4008000000000000;
+ *((unsigned long *)&__m256d_op0[1]) = 0x4000000000000000;
+ *((unsigned long *)&__m256d_op0[0]) = 0x3ff0000000000000;
+ *((unsigned long *)&__m128d_result0[1]) = 0x4000000000000000;
+ *((unsigned long *)&__m128d_result0[0]) = 0x3ff0000000000000;
+ *((unsigned long *)&__m128d_result1[1]) = 0x4010000000000000;
+ *((unsigned long *)&__m128d_result1[0]) = 0x4008000000000000;
+ __m128d_out = __lasx_extract_128_lo_d (__m256d_op0);
+ ASSERTEQ_64 (__LINE__, __m128d_result0, __m128d_out);
+ __m128d_out = __lasx_extract_128_hi_d (__m256d_op0);
+ ASSERTEQ_64 (__LINE__, __m128d_result1, __m128d_out);
+
+ return 0;
+}
diff --git
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
new file mode 100644
index 00000000000..d2219ea82de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-extract-256-128.c
@@ -0,0 +1,86 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1_lo:
+** vld (\$vr[0-9]+),\$r4,0
+** vpickve2gr.du \$r4,(\$vr[0-9]+),0
+** vpickve2gr.du \$r5,(\$vr[0-9]+),1
+** jr \$r1
+*/
+__m128
+foo1_lo (__m256 x)
+{
+ return __lasx_extract_128_lo_s (x);
+}
+
+/*
+**foo1_hi:
+** xvld (\$xr[0-9]+),\$r4,0
+** xvpermi.d (\$xr[0-9]+),(\$xr[0-9]+),0xe
+** vpickve2gr.du \$r4,(\$vr[0-9]+),0
+** vpickve2gr.du \$r5,(\$vr[0-9]+),1
+** jr \$r1
+*/
+__m128
+foo1_hi (__m256 x)
+{
+ return __lasx_extract_128_hi_s (x);
+}
+
+/*
+**foo2_lo:
+** vld (\$vr[0-9]+),\$r4,0
+** vpickve2gr.du \$r4,(\$vr[0-9]+),0
+** vpickve2gr.du \$r5,(\$vr[0-9]+),1
+** jr \$r1
+*/
+__m128d
+foo2_lo (__m256d x)
+{
+ return __lasx_extract_128_lo_d (x);
+}
+
+/*
+**foo2_hi:
+** xvld (\$xr[0-9]+),\$r4,0
+** xvpermi.d (\$xr[0-9]+),(\$xr[0-9]+),0xe
+** vpickve2gr.du \$r4,(\$vr[0-9]+),0
+** vpickve2gr.du \$r5,(\$vr[0-9]+),1
+** jr \$r1
+*/
+__m128d
+foo2_hi (__m256d x)
+{
+ return __lasx_extract_128_hi_d (x);
+}
+
+/*
+**foo3_lo:
+** vld (\$vr[0-9]+),\$r4,0
+** vpickve2gr.du \$r4,(\$vr[0-9]+),0
+** vpickve2gr.du \$r5,(\$vr[0-9]+),1
+** jr \$r1
+*/
+__m128i
+foo3_lo (__m256i x)
+{
+ return __lasx_extract_128_lo (x);
+}
+
+/*
+**foo3_hi:
+** xvld (\$xr[0-9]+),\$r4,0
+** xvpermi.d (\$xr[0-9]+),(\$xr[0-9]+),0xe
+** vpickve2gr.du \$r4,(\$vr[0-9]+),0
+** vpickve2gr.du \$r5,(\$vr[0-9]+),1
+** jr \$r1
+*/
+__m128i
+foo3_hi (__m256i x)
+{
+ return __lasx_extract_128_hi (x);
+}
diff --git
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c
new file mode 100644
index 00000000000..ce5abf94f4f
--- /dev/null
+++
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256-result.c
@@ -0,0 +1,97 @@
+/* { dg-options "-mabi=lp64d -O2 -mlasx -w -fno-strict-aliasing" } */
+
+#include "../simd_correctness_check.h"
+#include <lasxintrin.h>
+
+extern void abort (void);
+int
+main ()
+{
+ __m128i __m128i_op0, __m128i_op1, __m128i_out;
+ __m128 __m128_op0, __m128_op1, __m128_out;
+ __m128d __m128d_op0, __m128d_op1, __m128d_out;
+
+ __m256i __m256i_op0, __m256i_result0, __m256i_result1, __m256i_out;
+ __m256 __m256_op0, __m256_result0, __m256_result1, __m256_out;
+ __m256d __m256d_op0, __m256d_result0, __m256d_result1, __m256d_out;
+
+ //__m256_op0 = {1,2,3,4,5,6,7,8}, __m128_op0 ={9,9,9,9};
+ *((int *)&__m256_op0[7]) = 0x41000000;
+ *((int *)&__m256_op0[6]) = 0x40e00000;
+ *((int *)&__m256_op0[5]) = 0x40c00000;
+ *((int *)&__m256_op0[4]) = 0x40a00000;
+ *((int *)&__m256_op0[3]) = 0x40800000;
+ *((int *)&__m256_op0[2]) = 0x40400000;
+ *((int *)&__m256_op0[1]) = 0x40000000;
+ *((int *)&__m256_op0[0]) = 0x3f800000;
+ *((int *)&__m128_op0[3]) = 0x41100000;
+ *((int *)&__m128_op0[2]) = 0x41100000;
+ *((int *)&__m128_op0[1]) = 0x41100000;
+ *((int *)&__m128_op0[0]) = 0x41100000;
+ *((int *)&__m256_result0[7]) = 0x41000000;
+ *((int *)&__m256_result0[6]) = 0x40e00000;
+ *((int *)&__m256_result0[5]) = 0x40c00000;
+ *((int *)&__m256_result0[4]) = 0x40a00000;
+ *((int *)&__m256_result0[3]) = 0x41100000;
+ *((int *)&__m256_result0[2]) = 0x41100000;
+ *((int *)&__m256_result0[1]) = 0x41100000;
+ *((int *)&__m256_result0[0]) = 0x41100000;
+ *((int *)&__m256_result1[7]) = 0x41100000;
+ *((int *)&__m256_result1[6]) = 0x41100000;
+ *((int *)&__m256_result1[5]) = 0x41100000;
+ *((int *)&__m256_result1[4]) = 0x41100000;
+ *((int *)&__m256_result1[3]) = 0x40800000;
+ *((int *)&__m256_result1[2]) = 0x40400000;
+ *((int *)&__m256_result1[1]) = 0x40000000;
+ *((int *)&__m256_result1[0]) = 0x3f800000;
+ __m256_out = __lasx_insert_128_lo_s (__m256_op0, __m128_op0);
+ ASSERTEQ_32 (__LINE__, __m256_result0, __m256_out);
+ __m256_out = __lasx_insert_128_hi_s (__m256_op0, __m128_op0);
+ ASSERTEQ_32 (__LINE__, __m256_result1, __m256_out);
+
+ //__m256i_op0 ={1,2,3,4},__m128i_op0={5,6},__m128i_op1={7,8};
+ *((unsigned long *)&__m256i_op0[3]) = 0x4;
+ *((unsigned long *)&__m256i_op0[2]) = 0x3;
+ *((unsigned long *)&__m256i_op0[1]) = 0x2;
+ *((unsigned long *)&__m256i_op0[0]) = 0x1;
+ *((unsigned long *)&__m128i_op0[1]) = 0x6;
+ *((unsigned long *)&__m128i_op0[0]) = 0x5;
+ *((unsigned long *)&__m128i_op1[1]) = 0x8;
+ *((unsigned long *)&__m128i_op1[0]) = 0x7;
+ *((unsigned long *)&__m256i_result0[3]) = 0x4;
+ *((unsigned long *)&__m256i_result0[2]) = 0x3;
+ *((unsigned long *)&__m256i_result0[1]) = 0x6;
+ *((unsigned long *)&__m256i_result0[0]) = 0x5;
+ *((unsigned long *)&__m256i_result1[3]) = 0x8;
+ *((unsigned long *)&__m256i_result1[2]) = 0x7;
+ *((unsigned long *)&__m256i_result1[1]) = 0x2;
+ *((unsigned long *)&__m256i_result1[0]) = 0x1;
+ __m256i_out = __lasx_insert_128_lo (__m256i_op0, __m128i_op0);
+ ASSERTEQ_64 (__LINE__, __m256i_result0, __m256i_out);
+ __m256i_out = __lasx_insert_128_hi (__m256i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m256i_result1, __m256i_out);
+
+ //__m256d_op0 ={1,2,3,4},__m128d_op0={5,6},__m128d_op1={7,8};
+ *((unsigned long *)&__m256d_op0[3]) = 0x4010000000000000;
+ *((unsigned long *)&__m256d_op0[2]) = 0x4008000000000000;
+ *((unsigned long *)&__m256d_op0[1]) = 0x4000000000000000;
+ *((unsigned long *)&__m256d_op0[0]) = 0x3ff0000000000000;
+ *((unsigned long *)&__m128d_op0[1]) = 0x4018000000000000;
+ *((unsigned long *)&__m128d_op0[0]) = 0x4014000000000000;
+ *((unsigned long *)&__m128d_op1[1]) = 0x4020000000000000;
+ *((unsigned long *)&__m128d_op1[0]) = 0x401c000000000000;
+ *((unsigned long *)&__m256d_result0[3]) = 0x4010000000000000;
+ *((unsigned long *)&__m256d_result0[2]) = 0x4008000000000000;
+ *((unsigned long *)&__m256d_result0[1]) = 0x4018000000000000;
+ *((unsigned long *)&__m256d_result0[0]) = 0x4014000000000000;
+ *((unsigned long *)&__m256d_result1[3]) = 0x4020000000000000;
+ *((unsigned long *)&__m256d_result1[2]) = 0x401c000000000000;
+ *((unsigned long *)&__m256d_result1[1]) = 0x4000000000000000;
+ *((unsigned long *)&__m256d_result1[0]) = 0x3ff0000000000000;
+ __m256d_out = __lasx_insert_128_lo_d (__m256d_op0, __m128d_op0);
+ ASSERTEQ_64 (__LINE__, __m256d_result0, __m256d_out);
+ __m256d_out = __lasx_insert_128_hi_d (__m256d_op0, __m128d_op1);
+ ASSERTEQ_64 (__LINE__, __m256d_result1, __m256d_out);
+
+ return 0;
+}
diff --git
a/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256.c
b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256.c
new file mode 100644
index 00000000000..326c855f229
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/vect-insert-128-256.c
@@ -0,0 +1,95 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-mabi=lp64d -O2 -mlasx" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <lasxintrin.h>
+
+/*
+**foo1:
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,0
+** xvld (\$xr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x30
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256
+foo1 (__m256 x, __m128 y)
+{
+ return __builtin_lasx_insert_128_lo_s (x, y);
+}
+
+/*
+**foo2:
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,0
+** xvld (\$xr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x02
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256
+foo2 (__m256 x, __m128 y)
+{
+ return __builtin_lasx_insert_128_hi_s (x, y);
+}
+
+/*
+**foo3:
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,0
+** xvld (\$xr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x30
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256d
+foo3 (__m256d x, __m128d y)
+{
+ return __builtin_lasx_insert_128_lo_d (x, y);
+}
+
+/*
+**foo4:
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,0
+** xvld (\$xr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x02
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256d
+foo4 (__m256d x, __m128d y)
+{
+ return __builtin_lasx_insert_128_hi_d (x, y);
+}
+
+/*
+**foo5:
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,0
+** xvld (\$xr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x30
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256i
+foo5 (__m256i x, __m128i y)
+{
+ return __builtin_lasx_insert_128_lo (x, y);
+}
+
+/*
+**foo6:
+** vinsgr2vr.d (\$vr[0-9]+),\$r6,0
+** xvld (\$xr[0-9]+),\$r5,0
+** vinsgr2vr.d (\$vr[0-9]+),\$r7,1
+** xvpermi.q (\$xr[0-9]+),(\$xr[0-9]+),0x02
+** xvst (\$xr[0-9]+),\$r4,0
+** jr \$r1
+*/
+__m256i
+foo6 (__m256i x, __m128i y)
+{
+ return __builtin_lasx_insert_128_hi (x, y);
+}
--
2.20.1