[gcc r16-4902] LoongArch: Implement the vector dot product operation with quadruple width.

LuluCheng via Gcc-cvs Thu, 30 Oct 2025 18:09:30 -0700

https://gcc.gnu.org/g:4b1a27a7ed19872855d9ec93c0d2edc3d8272269


commit r16-4902-g4b1a27a7ed19872855d9ec93c0d2edc3d8272269
Author: Lulu Cheng <[email protected]>
Date:   Thu Oct 23 17:14:19 2025 +0800

    LoongArch: Implement the vector dot product operation with quadruple width.
    
    The logic for the vector dot product operation, where the destination
    elements are quadruple the width of the source elements, is as
    follows (Take sdot_prodv4siv16qi as an example.):
    
    v16i8 src1, src2;
    v4i32 src3, dest;
    
    dest[0] = src1[0] * src2[0] + src1[1] * src2[1]
              + src1[2] * src2[2] + src1[3] * src2[3]
              + src3[0]
    dest[1] = src1[4] * src2[4] + src1[5] * src2[5]
              + src1[6] * src2[6] + src1[7] * src2[7]
              + src3[1]
    dest[2] = src1[8] * src2[8] + src1[9] * src2[9]
              + src1[10] * src2[10] + src1[11] * src2[11]
              + src3[2]
    dest[3] = src1[12] * src2[12] + src1[13] * src2[13]
              + src1[14] * src2[14] + src1[15] * src2[15]
              + src3[3]
    
    gcc/ChangeLog:
    
            * config/loongarch/lasx.md (ILASX_HB): Move to ...
            * config/loongarch/lsx.md (ILSX_HB): Move to ...
            * config/loongarch/simd.md (ILSX_HB): ... here.
            (ILASX_HB): ... here.
            (IVEC_HB): New iterator.
            (WVEC_QUARTER): New attr.
            (wvec_quarter): Likewise.
            (simdfmt_qw): Likewise.
            (<su>dot_prod<wvec_quarter><mode>): New template.

Diff:
---
 gcc/config/loongarch/lasx.md |  3 ---
 gcc/config/loongarch/lsx.md  |  3 ---
 gcc/config/loongarch/simd.md | 53 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index efb0841aecf2..f053f436e96e 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -146,9 +146,6 @@
 ;; Only integer modes equal or larger than a word.
 (define_mode_iterator ILASX_DW  [V4DI V8SI])
 
-;; Only integer modes smaller than a word.
-(define_mode_iterator ILASX_HB  [V16HI V32QI])
-
 ;; Only used for immediate set shuffle elements instruction.
 (define_mode_iterator LASX_WHB_W [V8SI V16HI V32QI V8SF])
 
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 62a8a4bb3301..8ac17aee7ee8 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -145,9 +145,6 @@
 ;; Only integer modes equal or larger than a word.
 (define_mode_iterator ILSX_DW  [V2DI V4SI])
 
-;; Only integer modes smaller than a word.
-(define_mode_iterator ILSX_HB  [V8HI V16QI])
-
 ;;;; Only integer modes for fixed-point madd_q/maddr_q.
 ;;(define_mode_iterator ILSX_WH  [V4SI V8HI])
 
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 2cea82e0abf9..b73f65aca13a 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -23,6 +23,10 @@
 ;; Integer modes supported by LASX.
 (define_mode_iterator ILASX   [V4DI V8SI V16HI V32QI])
 
+;; Only integer modes smaller than a word.
+(define_mode_iterator ILSX_HB  [V8HI V16QI])
+(define_mode_iterator ILASX_HB  [V16HI V32QI])
+
 ;; FP modes supported by LSX
 (define_mode_iterator FLSX    [V2DF V4SF])
 
@@ -38,6 +42,10 @@
 ;; All integer modes available
 (define_mode_iterator IVEC    [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")])
 
+;; All integer modes smaller than a word.
+(define_mode_iterator IVEC_HB [(ILSX_HB "ISA_HAS_LSX")
+                              (ILASX_HB "ISA_HAS_LASX")])
+
 ;; All FP modes available
 (define_mode_iterator FVEC    [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")])
 
@@ -90,12 +98,18 @@
                             (V8HI "V4SI") (V16HI "V8SI")
                             (V16QI "V8HI") (V32QI "V16HI")])
 
+(define_mode_attr WVEC_QUARTER [(V8HI "V2DI") (V16HI "V4DI")
+                               (V16QI "V4SI") (V32QI "V8SI")])
+
 ;; Lower-case version.
 (define_mode_attr wvec_half [(V2DI "v1ti") (V4DI "v2ti")
                             (V4SI "v2di") (V8SI "v4di")
                             (V8HI "v4si") (V16HI "v8si")
                             (V16QI "v8hi") (V32QI "v16hi")])
 
+(define_mode_attr wvec_quarter [(V8HI "v2di") (V16HI "v4di")
+                               (V16QI "v4si") (V32QI "v8si")])
+
 ;; Integer vector modes with the same length and unit size as a mode.
 (define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI")
                          (V8HI "V8HI") (V16QI "V16QI")
@@ -124,12 +138,16 @@
                           (V8HI "h") (V16HI "h")
                           (V16QI "b") (V32QI "b")])
 
-;; Suffix for widening LSX or LASX instructions.
+;; Suffix for double widening LSX or LASX instructions.
 (define_mode_attr simdfmt_w [(V2DI "q") (V4DI "q")
                             (V4SI "d") (V8SI "d")
                             (V8HI "w") (V16HI "w")
                             (V16QI "h") (V32QI "h")])
 
+;; Suffix for quadruple widening LSX or LASX instructions.
+(define_mode_attr simdfmt_qw [(V8HI "d") (V16HI "d")
+                            (V16QI "w") (V32QI "w")])
+
 ;; Suffix for integer mode in LSX or LASX instructions with FP input but
 ;; integer output.
 (define_mode_attr simdifmt_for_f [(V2DF "l") (V4DF "l")
@@ -839,6 +857,39 @@
   DONE;
 })
 
+(define_expand "<su>dot_prod<wvec_quarter><mode>"
+ [(match_operand:<WVEC_QUARTER> 0 "register_operand" "=f,f")
+  (match_operand:IVEC_HB 1 "register_operand" "f,f")
+  (match_operand:IVEC_HB 2 "register_operand" "f,f")
+  (match_operand:<WVEC_QUARTER> 3 "reg_or_0_operand" "f, YG")
+  (any_extend (const_int 0))]
+  ""
+{
+  rtx *op = operands;
+  rtx res_mulev = gen_reg_rtx (<WVEC_HALF>mode);
+  rtx res_mulod = gen_reg_rtx (<WVEC_HALF>mode);
+  rtx res_addev = gen_reg_rtx (<WVEC_QUARTER>mode);
+  rtx res_addod = gen_reg_rtx (<WVEC_QUARTER>mode);
+  emit_insn (gen_<simd_isa>_<x>vmulwev_<simdfmt_w>_<simdfmt><u>
+             (res_mulev, op[1], op[2]));
+  emit_insn (gen_<simd_isa>_<x>vmulwod_<simdfmt_w>_<simdfmt><u>
+             (res_mulod, op[1], op[2]));
+  emit_insn (gen_<simd_isa>_<x>vhaddw_<simdfmt_qw><u>_<simdfmt_w><u>
+              (res_addev, res_mulev, res_mulev));
+  emit_insn (gen_<simd_isa>_<x>vhaddw_<simdfmt_qw><u>_<simdfmt_w><u>
+             (res_addod, res_mulod, res_mulod));
+  if (op[3] == CONST0_RTX (<WVEC_QUARTER>mode))
+    emit_insn (gen_add<wvec_quarter>3 (op[0], res_addev,
+                                      res_addod));
+  else
+    {
+      emit_insn (gen_add<wvec_quarter>3 (res_addev, res_addev,
+                                        res_addod));
+      emit_insn (gen_add<wvec_quarter>3 (op[0], res_addev, op[3]));
+    }
+  DONE;
+})
+
 (define_insn "simd_maddw_evod_<mode>_hetero"
   [(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
        (plus:<WVEC_HALF>

[gcc r16-4902] LoongArch: Implement the vector dot product operation with quadruple width.

Reply via email to