Hi,jiahao:

 The instruction latencies of the two instructions I tested here are the same on 3a5000 and 3a6000.

This issue needs to be confirmed again.

在 2024/1/5 下午3:37, Jiahao Xu 写道:
For zero_extendqisi2 and zero_extendqidi2, use andi instead of bstrpick.w,
because andi is 6 times faster than bstrpick.w.

gcc/ChangeLog:

        * config/loongarch/loongarch.md:
        (zero_extend<SHORT:mode><GPR:mode>2): Rename to ..
        (zero_extendhi<GPR:mode>2): .. this, use hi.
        (zero_extendqihi2): Rename to ..
        (zero_extendqi<HWD:mode>2): .. this, and extend to HWD.
        (*zero_extend<GPR:mode>_trunc<SHORT:mode>): Rename to ..
        (*zero_extend<GPR:mode>_trunchi): .. this, use hi.
        (*zero_extendhi_truncqi): Rename to ..
        (*zero_extend<HWD:mode>_truncqi): .. this, and extend to HWD.

gcc/testsuite/ChangeLog:

        * gcc.target/loongarch/zeroextend-qi.c: New test.

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index d1f5b94f5d6..843dee77a60 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -397,6 +397,9 @@
  ;; Likewise the 64-bit truncate-and-shift patterns.
  (define_mode_iterator SUBDI [QI HI SI])
+;; Scalar fixed point modes but excludes QI.
+(define_mode_iterator HWD [HI SI (DI "TARGET_64BIT")])
+
  ;; Iterator for scalar fixed point modes.
  (define_mode_iterator QHWD [QI HI SI (DI "TARGET_64BIT")])
@@ -1659,48 +1662,48 @@
    [(set_attr "move_type" "arith,load,load,load")
     (set_attr "mode" "DI")])
-(define_insn "zero_extend<SHORT:mode><GPR:mode>2"
+(define_insn "zero_extendhi<GPR:mode>2"
    [(set (match_operand:GPR 0 "register_operand" "=r,r,r")
        (zero_extend:GPR
-            (match_operand:SHORT 1 "nonimmediate_operand" "r,m,k")))]
+            (match_operand:HI 1 "nonimmediate_operand" "r,m,k")))]
    ""
    "@
-   bstrpick.w\t%0,%1,<SHORT:7_or_15>,0
-   ld.<SHORT:size>u\t%0,%1
-   ldx.<SHORT:size>u\t%0,%1"
+   bstrpick.w\t%0,%1,15,0
+   ld.hu\t%0,%1
+   ldx.hu\t%0,%1"
    [(set_attr "move_type" "pick_ins,load,load")
     (set_attr "mode" "<GPR:MODE>")])
-(define_insn "zero_extendqihi2"
-  [(set (match_operand:HI 0 "register_operand" "=r,r,r")
-       (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
+(define_insn "zero_extendqi<HWD:mode>2"
+  [(set (match_operand:HWD 0 "register_operand" "=r,r,r")
+       (zero_extend:HWD (match_operand:QI 1 "nonimmediate_operand" "r,k,m")))]
    ""
    "@
     andi\t%0,%1,0xff
     ldx.bu\t%0,%1
     ld.bu\t%0,%1"
    [(set_attr "move_type" "andi,load,load")
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "<HWD:MODE>")])
;; Combiner patterns to optimize truncate/zero_extend combinations. -(define_insn "*zero_extend<GPR:mode>_trunc<SHORT:mode>"
+(define_insn "*zero_extend<GPR:mode>_trunchi"
    [(set (match_operand:GPR 0 "register_operand" "=r")
        (zero_extend:GPR
-           (truncate:SHORT (match_operand:DI 1 "register_operand" "r"))))]
+           (truncate:HI (match_operand:DI 1 "register_operand" "r"))))]
    "TARGET_64BIT"
-  "bstrpick.w\t%0,%1,<SHORT:7_or_15>,0"
+  "bstrpick.w\t%0,%1,15,0"
    [(set_attr "move_type" "pick_ins")
     (set_attr "mode" "<GPR:MODE>")])
-(define_insn "*zero_extendhi_truncqi"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-       (zero_extend:HI
+(define_insn "*zero_extend<HWD:mode>_truncqi"
+  [(set (match_operand:HWD 0 "register_operand" "=r")
+       (zero_extend:HWD
            (truncate:QI (match_operand:DI 1 "register_operand" "r"))))]
    "TARGET_64BIT"
    "andi\t%0,%1,0xff"
    [(set_attr "alu_type" "and")
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "<HWD:MODE>")])
  
  ;;
  ;;  ....................
diff --git a/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c 
b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c
new file mode 100644
index 00000000000..1da8cdad2ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/zeroextend-qi.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "andi" } } */
+
+#include <stdint.h>
+
+uint8_t
+foo (uint64_t a, uint8_t b)
+{
+  return a + b;
+}

Reply via email to