[PATCH V3 3/3] RISC-V: Add C intrinsic for Scalar Bitmanip Extension

2023-12-25 Thread Liao Shihua
This patch adds C intrinsics for Bitmanip Extension.
RISCV_BUILTIN_NO_PREFIX is a new riscv_builtin_description like RISCV_BUILTIN.
But it uses CODE_FOR_##INSN rather than CODE_FOR_riscv_##INSN.
Changed orcb, clmul, brev8 pattern's mode form X to GPR because orcbsi, 
clmul_si, 
brev8_si are both included in rv32 and rv64. Test them in 
scalar_bitmanip_intrinsic-64-emulated.c.

gcc/ChangeLog:

* config.gcc: Include riscv_bitmanip.h.
* config/riscv/bitmanip.md: Changed mode form X to GPR in orcb and 
clmul pattern.
* config/riscv/crypto.md: Changed mode form X to GPR in brev8 pattern.
* config/riscv/riscv-builtins.cc (AVAIL): New AVAIL.
(RISCV_BUILTIN_NO_PREFIX): New riscv_builtin_description.
* config/riscv/riscv-cmo.def (RISCV_BUILTIN): New builtins.
* config/riscv/riscv-ftypes.def (2): New ftypes.
* config/riscv/riscv-scalar-crypto.def (RISCV_BUILTIN): New builtins.
(RISCV_BUILTIN_NO_PREFIX): Ditto.
* config/riscv/riscv_bitmanip.h: New file.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/scalar_bitmanip_intrinsic-32.c: New test.
* gcc.target/riscv/scalar_bitmanip_intrinsic-64-emulated.c: New test.
* gcc.target/riscv/scalar_bitmanip_intrinsic-64.c: New test.

---
 gcc/config.gcc|   2 +-
 gcc/config/riscv/bitmanip.md  |  10 +-
 gcc/config/riscv/crypto.md|   4 +-
 gcc/config/riscv/riscv-builtins.cc|  22 ++
 gcc/config/riscv/riscv-cmo.def|  12 +-
 gcc/config/riscv/riscv-ftypes.def |   2 +
 gcc/config/riscv/riscv-scalar-crypto.def  |  22 +-
 gcc/config/riscv/riscv_bitmanip.h | 297 ++
 .../riscv/scalar_bitmanip_intrinsic-32.c  |  96 ++
 .../scalar_bitmanip_intrinsic-64-emulated.c   |  32 ++
 .../riscv/scalar_bitmanip_intrinsic-64.c  | 114 +++
 11 files changed, 597 insertions(+), 16 deletions(-)
 create mode 100644 gcc/config/riscv/riscv_bitmanip.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-64-emulated.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-64.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index f8483d34ee9..5999ef5cbc8 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -549,7 +549,7 @@ riscv*)
extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
extra_objs="${extra_objs} thead.o riscv-target-attr.o"
d_target_objs="riscv-d.o"
-   extra_headers="riscv_vector.h riscv_crypto.h"
+   extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.h"
;;
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 92bcdc30fe4..23a06514732 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -443,8 +443,8 @@
 ;; orc.b (or-combine) is added as an unspec for the benefit of the support
 ;; for optimized string functions (such as strcmp).
 (define_insn "orcb2"
-  [(set (match_operand:X 0 "register_operand" "=r")
-   (unspec:X [(match_operand:X 1 "register_operand" "r")] UNSPEC_ORC_B))]
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (unspec:GPR [(match_operand:GPR 1 "register_operand" "r")] 
UNSPEC_ORC_B))]
   "TARGET_ZBB"
   "orc.b\t%0,%1"
   [(set_attr "type" "bitmanip")])
@@ -852,9 +852,9 @@
 
 ;; ZBKC or ZBC extension
 (define_insn "riscv_clmul_"
-  [(set (match_operand:X 0 "register_operand" "=r")
-(unspec:X [(match_operand:X 1 "register_operand" "r")
-  (match_operand:X 2 "register_operand" "r")]
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+(unspec:GPR [(match_operand:GPR 1 "register_operand" "r")
+  (match_operand:GPR 2 "register_operand" "r")]
   UNSPEC_CLMUL))]
   "TARGET_ZBKC || TARGET_ZBC"
   "clmul\t%0,%1,%2"
diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index 2b65fadeb15..bf3d1cd9a3c 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -72,8 +72,8 @@
 
 ;; ZBKB extension
 (define_insn "riscv_brev8_"
-  [(set (match_operand:X 0 "register_operand" "=r")
-(unspec:X [(match_operand:X 1 "register_operand" "r")]
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+(unspec:GPR [(match_operand:GPR 1 "register_operand" "r")]
   UNSPEC_BREV8))]
   "TARGET_ZBKB"
   "brev8\t%0,%1"
diff --git a/gcc/config/riscv/riscv-builtins.cc 
b/gcc/config/riscv/riscv-builtins.cc
index 5ee11ebe3bc..fc6ff548b83 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -105,6 +105,7 @@ AVAIL (zero32,  TARG

[PATCH V3 2/3] RISC-V: Add C intrinsic for Scalar Crypto Extension

2023-12-25 Thread Liao Shihua
This patch adds C intrinsics for Scalar Crypto Extension.

gcc/ChangeLog:

* config.gcc: Include riscv_crypto.h.
* config/riscv/riscv_crypto.h: New file.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/scalar_crypto_intrinsic-32.c: New test.
* gcc.target/riscv/scalar_crypto_intrinsic-64.c: New test.

---
 gcc/config.gcc|   2 +-
 gcc/config/riscv/riscv_crypto.h   | 309 ++
 .../riscv/scalar_crypto_intrinsic-32.c| 114 +++
 .../riscv/scalar_crypto_intrinsic-64.c| 122 +++
 4 files changed, 546 insertions(+), 1 deletion(-)
 create mode 100644 gcc/config/riscv/riscv_crypto.h
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_crypto_intrinsic-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_crypto_intrinsic-64.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index f0676c830e8..f8483d34ee9 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -549,7 +549,7 @@ riscv*)
extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
extra_objs="${extra_objs} thead.o riscv-target-attr.o"
d_target_objs="riscv-d.o"
-   extra_headers="riscv_vector.h"
+   extra_headers="riscv_vector.h riscv_crypto.h"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.h"
;;
diff --git a/gcc/config/riscv/riscv_crypto.h b/gcc/config/riscv/riscv_crypto.h
new file mode 100644
index 000..14ccc24c98d
--- /dev/null
+++ b/gcc/config/riscv/riscv_crypto.h
@@ -0,0 +1,309 @@
+/* RISC-V 'Scalar Crypto' Extension intrinsics include file.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#ifndef __RISCV_SCALAR_CRYPTO_H
+#define __RISCV_SCALAR_CRYPTO_H
+
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined (__riscv_zknd)
+
+#if __riscv_xlen == 32
+
+#ifdef __OPTIMIZE__
+
+extern __inline uint32_t
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_aes32dsi (uint32_t rs1, uint32_t rs2, const int bs)
+{
+  return __builtin_riscv_aes32dsi (rs1,rs2,bs);
+}
+
+extern __inline uint32_t
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_aes32dsmi (uint32_t rs1, uint32_t rs2, const int bs)
+{
+  return __builtin_riscv_aes32dsmi (rs1,rs2,bs);
+}
+
+#else
+#define __riscv_aes32dsi(x, y, bs) __builtin_riscv_aes32dsi (x, y, bs)
+#define __riscv_aes32dsmi(x, y, bs) __builtin_riscv_aes32dsmi (x, y, bs)
+#endif
+
+#endif
+
+#if __riscv_xlen == 64
+
+extern __inline uint64_t
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_aes64ds (uint64_t rs1, uint64_t rs2)
+{
+  return __builtin_riscv_aes64ds (rs1,rs2);
+}
+
+extern __inline uint64_t
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_aes64dsm (uint64_t rs1, uint64_t rs2)
+{
+  return __builtin_riscv_aes64dsm (rs1,rs2);
+}
+
+extern __inline uint64_t
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_aes64im (uint64_t rs1)
+{
+  return __builtin_riscv_aes64im (rs1);
+}
+#endif
+#endif // __riscv_zknd
+
+#if (defined (__riscv_zknd) || defined (__riscv_zkne)) && (__riscv_xlen == 64)
+
+#ifdef __OPTIMIZE__
+
+extern __inline uint64_t
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_aes64ks1i (uint64_t rs1, const int rnum)
+{
+  return __builtin_riscv_aes64ks1i (rs1,rnum);
+}
+
+#else
+#define __riscv_aes64ks1i(x, rnum) __builtin_riscv_aes64ks1i (x, rnum)
+#endif
+
+extern __inline uint64_t
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__riscv_aes64ks2 (uint64_t rs1, uint64_t rs2)
+{
+return __builtin_riscv_aes64ks2 (rs1,rs2);
+}
+
+#endif // __riscv_zknd || __riscv_zkne
+
+#if defined (__riscv_zkne)
+
+#if __riscv_xlen == 32
+
+#ifdef __OPTIMIZE__
+
+extern __inline uint32_t
+__attribute__ ((__gnu_inline__, __alwa

[PATCH V3 0/3] RISC-V: Add intrinsics for Bitmanip and Scalar Crypto extensions

2023-12-25 Thread Liao Shihua
Update v2 -> v3:
  1. Change pattern mode form X to GPR in orcb, clmul, and brev8.
  2. Add emulated testsuite.
  3. Removed duplicate testsuite between built-in and intrinsic. 
  4. Typo fix.

Update v1 -> v2:
  1. Rename *_intrinsic-* to *_intrinsic-XLEN.
  2. Typo fix.
  3. Intrinsics with immediate arguments will use marcos at O0 .

It's a little patch add just provides a mapping from the RV intrinsics to the 
builtin 
names within GCC.

Liao Shihua (3):
  RISC-V: Remove the Scalar Bitmanip and Crypto Built-In function
testsuites
  RISC-V: Add C intrinsic for Scalar Crypto Extension
  RISC-V: Add C intrinsic for Scalar Bitmanip Extension

 gcc/config.gcc|   2 +-
 gcc/config/riscv/bitmanip.md  |  10 +-
 gcc/config/riscv/crypto.md|   4 +-
 gcc/config/riscv/riscv-builtins.cc|  22 ++
 gcc/config/riscv/riscv-cmo.def|  12 +-
 gcc/config/riscv/riscv-ftypes.def |   2 +
 gcc/config/riscv/riscv-scalar-crypto.def  |  22 +-
 gcc/config/riscv/riscv_bitmanip.h | 297 +
 gcc/config/riscv/riscv_crypto.h   | 309 ++
 .../riscv/scalar_bitmanip_intrinsic-32.c  |  96 ++
 .../scalar_bitmanip_intrinsic-64-emulated.c   |  32 ++
 .../riscv/scalar_bitmanip_intrinsic-64.c  | 114 +++
 .../riscv/scalar_crypto_intrinsic-32.c| 114 +++
 .../riscv/scalar_crypto_intrinsic-64.c| 122 +++
 gcc/testsuite/gcc.target/riscv/zbbw.c |  26 --
 gcc/testsuite/gcc.target/riscv/zbc32.c|  23 --
 gcc/testsuite/gcc.target/riscv/zbc64.c|  23 --
 gcc/testsuite/gcc.target/riscv/zbkb32.c   |  18 -
 gcc/testsuite/gcc.target/riscv/zbkb64.c   |   5 -
 gcc/testsuite/gcc.target/riscv/zbkc32.c   |  17 -
 gcc/testsuite/gcc.target/riscv/zbkc64.c   |  17 -
 gcc/testsuite/gcc.target/riscv/zbkx32.c   |  18 -
 gcc/testsuite/gcc.target/riscv/zbkx64.c   |  18 -
 gcc/testsuite/gcc.target/riscv/zknd32-2.c |  28 --
 gcc/testsuite/gcc.target/riscv/zknd64-2.c |  42 ---
 gcc/testsuite/gcc.target/riscv/zkne32-2.c |  28 --
 gcc/testsuite/gcc.target/riscv/zkne64-2.c |  34 --
 .../gcc.target/riscv/zknh-sha256-32.c |  10 -
 .../gcc.target/riscv/zknh-sha256-64.c |  28 --
 .../gcc.target/riscv/zknh-sha512-32.c |  42 ---
 .../gcc.target/riscv/zknh-sha512-64.c |  31 --
 gcc/testsuite/gcc.target/riscv/zksed32-2.c|  29 --
 gcc/testsuite/gcc.target/riscv/zksed64-2.c|  29 --
 gcc/testsuite/gcc.target/riscv/zksh32.c   |  19 --
 gcc/testsuite/gcc.target/riscv/zksh64.c   |  19 --
 35 files changed, 1142 insertions(+), 520 deletions(-)
 create mode 100644 gcc/config/riscv/riscv_bitmanip.h
 create mode 100644 gcc/config/riscv/riscv_crypto.h
 create mode 100644 
gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-32.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-64-emulated.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/scalar_bitmanip_intrinsic-64.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_crypto_intrinsic-32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/scalar_crypto_intrinsic-64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbbw.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbc32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbc64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknd32-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknd64-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zkne32-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zkne64-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256-32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256-64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zksed32-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zksed64-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zksh32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zksh64.c

-- 
2.34.1



[PATCH V3 1/3] RISC-V: Remove the Scalar Bitmanip and Crypto Built-In function testsuites

2023-12-25 Thread Liao Shihua
The serials patch provides a mapping from the RV intrinsics to the builtin 
names.
There are some duplicates testsuites between intrinsic and built-in function.
Remove the Scalar Bitmanip and Scalar Crypto Built-In function testsuites 
that will be included in the intrinsic functions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zbkb32.c: Remove 
__builtin_riscv_(un)zip,__builtin_riscv_brev8.
* gcc.target/riscv/zbkb64.c: Remove __builtin_riscv_brev8.
* gcc.target/riscv/zbbw.c: Removed.
* gcc.target/riscv/zbc32.c: Removed.
* gcc.target/riscv/zbc64.c: Removed.
* gcc.target/riscv/zbkc32.c: Removed.
* gcc.target/riscv/zbkc64.c: Removed.
* gcc.target/riscv/zbkx32.c: Removed.
* gcc.target/riscv/zbkx64.c: Removed.
* gcc.target/riscv/zknd32-2.c: Removed.
* gcc.target/riscv/zknd64-2.c: Removed.
* gcc.target/riscv/zkne32-2.c: Removed.
* gcc.target/riscv/zkne64-2.c: Removed.
* gcc.target/riscv/zknh-sha256-32.c: Removed.
* gcc.target/riscv/zknh-sha256-64.c: Removed.
* gcc.target/riscv/zknh-sha512-32.c: Removed.
* gcc.target/riscv/zknh-sha512-64.c: Removed.
* gcc.target/riscv/zksed32-2.c: Removed.
* gcc.target/riscv/zksed64-2.c: Removed.
* gcc.target/riscv/zksh32.c: Removed.
* gcc.target/riscv/zksh64.c: Removed.

---
 gcc/testsuite/gcc.target/riscv/zbbw.c | 26 
 gcc/testsuite/gcc.target/riscv/zbc32.c| 23 --
 gcc/testsuite/gcc.target/riscv/zbc64.c| 23 --
 gcc/testsuite/gcc.target/riscv/zbkb32.c   | 18 
 gcc/testsuite/gcc.target/riscv/zbkb64.c   |  5 ---
 gcc/testsuite/gcc.target/riscv/zbkc32.c   | 17 
 gcc/testsuite/gcc.target/riscv/zbkc64.c   | 17 
 gcc/testsuite/gcc.target/riscv/zbkx32.c   | 18 
 gcc/testsuite/gcc.target/riscv/zbkx64.c   | 18 
 gcc/testsuite/gcc.target/riscv/zknd32-2.c | 28 -
 gcc/testsuite/gcc.target/riscv/zknd64-2.c | 42 ---
 gcc/testsuite/gcc.target/riscv/zkne32-2.c | 28 -
 gcc/testsuite/gcc.target/riscv/zkne64-2.c | 34 ---
 .../gcc.target/riscv/zknh-sha256-32.c | 10 -
 .../gcc.target/riscv/zknh-sha256-64.c | 28 -
 .../gcc.target/riscv/zknh-sha512-32.c | 42 ---
 .../gcc.target/riscv/zknh-sha512-64.c | 31 --
 gcc/testsuite/gcc.target/riscv/zksed32-2.c| 29 -
 gcc/testsuite/gcc.target/riscv/zksed64-2.c| 29 -
 gcc/testsuite/gcc.target/riscv/zksh32.c   | 19 -
 gcc/testsuite/gcc.target/riscv/zksh64.c   | 19 -
 21 files changed, 504 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbbw.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbc32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbc64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbkc32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbkc64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbkx32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zbkx64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknd32-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknd64-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zkne32-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zkne64-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256-32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha256-64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zknh-sha512-64.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zksed32-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zksed64-2.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zksh32.c
 delete mode 100644 gcc/testsuite/gcc.target/riscv/zksh64.c

diff --git a/gcc/testsuite/gcc.target/riscv/zbbw.c 
b/gcc/testsuite/gcc.target/riscv/zbbw.c
deleted file mode 100644
index bdf6b0c4ec5..000
--- a/gcc/testsuite/gcc.target/riscv/zbbw.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
-
-int
-clz (int i)
-{
-  return __builtin_clz (i);
-}
-
-int
-ctz (int i)
-{
-  return __builtin_ctz (i);
-}
-
-int
-popcount (int i)
-{
-  return __builtin_popcount (i);
-}
-
-
-/* { dg-final { scan-assembler-times {\mclzw} 1 } } */
-/* { dg-final { scan-assembler-times {\mctzw} 1 } } */
-/* { dg-final { scan-assembler-times {\mcpopw} 1 } } */
-/* { dg-final { scan-assembler-not "andi\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbc32.c 
b/gcc/testsuite/gcc.target/riscv/zbc32.c
deleted file mode 100644
index 049ea95c56b..000
--- a/gcc/testsuite/gcc.target/riscv/zbc32.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -march=rv32gc_zbc -mabi=ilp32" } */
-/* { dg-skip-if "" { *-*-* } { "-

GuangZhou ShenZhen to Jakarta Semarang Surabaya from VITA|global logistics

2023-12-25 Thread Eva
Your reader does not support HTML
Dear 
Good day~This is Eva from VITA Company.
VITA offer full air,sea and road freight service from China main ports and 
area,focused on cross-border logistics and transportation service between China 
and Indonesia since its establishment
Our range of service extends to include
1)FCL/LCL Sea Shipping
2)Direct and consolidated air freight
3)Warehousing & transloading
4)Customs clearance and documentation
5)DDU&DDP
6)SGS Service
If you have any inquiries you are planning at the moment, you can reach out to 
me


[PATCH v2] LoongArch: Replace -mexplicit-relocs=auto simple-used address peephole2 with combine

2023-12-25 Thread Xi Ruoyao
The problem with peephole2 is it uses a naive sliding-window algorithm
and misses many cases.  For example:

float a[1];
float t() { return a[0] + a[8000]; }

is compiled to:

la.local$r13,a
la.local$r12,a+32768
fld.s   $f1,$r13,0
fld.s   $f0,$r12,-768
fadd.s  $f0,$f1,$f0

by trunk.  But as we've explained in r14-4851, the following would be
better with -mexplicit-relocs=auto:

pcalau12i   $r13,%pc_hi20(a)
pcalau12i   $r12,%pc_hi20(a+32000)
fld.s   $f1,$r13,%pc_lo12(a)
fld.s   $f0,$r12,%pc_lo12(a+32000)
fadd.s  $f0,$f1,$f0

However the sliding-window algorithm just won't detect the pcalau12i/fld
pair to be optimized.  Use a define_insn_and_split in combine pass will
work around the issue.

gcc/ChangeLog:

* config/loongarch/loongarch.md:
(simple_load): New
define_insn_and_split.
(simple_load_off): Likewise.
(simple_load_ext): Likewise.
(simple_load_offext):
Likewise.
(simple_store): Likewise.
(simple_store_off): Likewise.
(define_peephole2): Remove la.local/[f]ld peepholes.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c:
New test.
---

Change from [v1]:
- Add "&& true" as the split condition [as suggested][1].

[v1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640280.html
[1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/641407.html

Bootstrapped and regtested on loongarch64-linux-gnu (on top of
r14-6829).  Ok for trunk?

 gcc/config/loongarch/loongarch.md | 165 +-
 ...explicit-relocs-auto-single-load-store-2.c |  11 ++
 2 files changed, 98 insertions(+), 78 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 7021105b241..18a2d05325b 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -4123,101 +4123,110 @@ (define_insn "loongarch_crcc_w__w"
 ;;
 ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
 ;; 3 instructions).
-(define_peephole2
-  [(set (match_operand:P 0 "register_operand")
-   (match_operand:P 1 "symbolic_pcrel_operand"))
-   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
-   (mem:LD_AT_LEAST_32_BIT (match_dup 0)))]
-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
-   && (peep2_reg_dead_p (2, operands[0]) \
-   || REGNO (operands[0]) == REGNO (operands[2]))"
-  [(set (match_dup 2)
-   (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1]
+(define_insn_and_split "simple_load"
+  [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
+   (mem:LD_AT_LEAST_32_BIT
+ (match_operand:P 1 "symbolic_pcrel_operand" "")))]
+  "loongarch_pre_reload_split () \
+   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+   (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1]
   {
-emit_insn (gen_pcalau12i_gr (operands[0], operands[1]));
+operands[2] = gen_reg_rtx (Pmode);
+emit_insn (gen_pcalau12i_gr (operands[2], operands[1]));
   })
 
-(define_peephole2
-  [(set (match_operand:P 0 "register_operand")
-   (match_operand:P 1 "symbolic_pcrel_operand"))
-   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
-   (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
-   (match_operand 3 "const_int_operand"]
-  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
-   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
-   && (peep2_reg_dead_p (2, operands[0]) \
-   || REGNO (operands[0]) == REGNO (operands[2]))"
-  [(set (match_dup 2)
-   (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1]
+(define_insn_and_split "simple_load_off"
+  [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
+   (mem:LD_AT_LEAST_32_BIT
+ (plus (match_operand:P 1 "symbolic_pcrel_operand" "")
+   (match_operand 2 "const_int_operand" ""]
+  "loongarch_pre_reload_split () \
+   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
+   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+   (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 2) (match_dup 1]
   {
-operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
-emit_insn (gen_pcalau12i_gr (operands[0], operands[1]));
+HOST_WIDE_INT offset = INTVAL (operands[2]);
+operands[2] = gen_reg_rtx (Pmode);
+operands[1] = plus_constant (Pmode, operands[1], offset);
+emit_insn (gen_pcalau12i_gr (operands[2], operands[1]));
   })
 
-(define_peephole2
-  [(set (match_operand:P 0 "register_ope

Re: [PATCH] RISC-V: Move RVV V_REGS liveness computation into analyze_loop_vinfo

2023-12-25 Thread Kito Cheng
LGTM :)

On Mon, Dec 25, 2023 at 5:18 PM Juzhe-Zhong  wrote:
>
> Currently, we compute RVV V_REGS liveness during better_main_loop_than_p 
> which is not appropriate
> time to do that since we for example, when have the codes will finally pick 
> LMUL = 8 vectorization
> factor, we compute liveness for LMUL = 8 multiple times which are redundant.
>
> Since we have leverage the current ARM SVE COST model:
>
>   /* Do one-time initialization based on the vinfo.  */
>   loop_vec_info loop_vinfo = dyn_cast (m_vinfo);
>   if (!m_analyzed_vinfo)
> {
>   if (loop_vinfo)
> analyze_loop_vinfo (loop_vinfo);
>
>   m_analyzed_vinfo = true;
> }
>
> Analyze COST model only once for each cost model.
>
> So here we move dynamic LMUL liveness information into analyze_loop_vinfo.
>
> /* Do one-time initialization of the costs given that we're
>costing the loop vectorization described by LOOP_VINFO.  */
> void
> costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
> {
>   ...
>
>   /* Detect whether the LOOP has unexpected spills.  */
>   record_potential_unexpected_spills (loop_vinfo);
> }
>
> So that we can avoid redundant computations and the current dynamic LMUL cost 
> model flow is much
> more reasonable and consistent with others.
>
> Tested on RV32 and RV64 no regressions.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-costs.cc (compute_estimated_lmul): Allow 
> fractional vecrtor.
> (preferred_new_lmul_p): Move RVV V_REGS liveness computation into 
> analyze_loop_vinfo.
> (has_unexpected_spills_p): New function.
> (costs::record_potential_unexpected_spills): Ditto.
> (costs::better_main_loop_than_p): Move RVV V_REGS liveness 
> computation into analyze_loop_vinfo.
> * config/riscv/riscv-vector-costs.h: New functions and variables.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c: Robostify 
> test.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-1.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-2.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-4.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-5.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-7.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/pr111848.c: Ditto.
> * gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c: Ditto.
>
> ---
>  gcc/config/riscv/riscv-vector-costs.cc| 110 +++---
>  gcc/config/riscv/riscv-vector-costs.h |   8 ++
>  .../riscv/rvv/dynamic-lmul-mixed-1.c  |   5 +-
>  .../costmodel/riscv/rvv/dynamic-lmul1-1.c |   5 +-
>  .../costmodel/riscv/rvv/dynamic-lmul1-2.c |   5 +-
>  .../costmodel/riscv/rvv/dynamic-lmul1-3.c |   5 +-
>  .../costmodel/riscv/rvv/dynamic-lmul1-4.c |   5 +-
>  .../costmodel/riscv/rvv/dynamic-lmul1-5.c |   5 +-
>  .../costmodel/riscv/rvv/dynamic-lmul1-6.c |   5 +-
>  .../costmodel/riscv/rvv/dynamic-lmul1-7.c |   5 +-
>  

[PATCH][committed]middle-end: explicitly initialize vec_stmts [PR113132]

2023-12-25 Thread Tamar Christina
Hi All,

when configured with --enable-checking=release we get a false
positive on the use of vec_stmts as the compiler seems unable
to notice it gets initialized through the pass-by-reference.

This explicitly initializes the local.

Bootstrapped Regtested on x86_64-pc-linux-gnu and no issues.

Committed under the obvious rule.

Thanks,
Tamar

gcc/ChangeLog:

PR bootstrap/113132
* tree-vect-loop.cc (vect_create_epilog_for_reduction): Initialize 
vec_stmts;

--- inline copy of patch -- 
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 
88261a3a4f57d5e2124939b069b0e92c57d9abba..f51ae3e719e753059389cf9495b6d65b3b1191cb
 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6207,7 +6207,7 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
   exit_bb = loop_exit->dest;
   exit_gsi = gsi_after_labels (exit_bb);
   reduc_inputs.create (slp_node ? vec_num : ncopies);
-  vec  vec_stmts;
+  vec  vec_stmts = vNULL;
   for (unsigned i = 0; i < vec_num; i++)
 {
   gimple_seq stmts = NULL;




-- 
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 
88261a3a4f57d5e2124939b069b0e92c57d9abba..f51ae3e719e753059389cf9495b6d65b3b1191cb
 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6207,7 +6207,7 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
   exit_bb = loop_exit->dest;
   exit_gsi = gsi_after_labels (exit_bb);
   reduc_inputs.create (slp_node ? vec_num : ncopies);
-  vec  vec_stmts;
+  vec  vec_stmts = vNULL;
   for (unsigned i = 0; i < vec_num; i++)
 {
   gimple_seq stmts = NULL;





[r14-6822 Regression] FAIL: 25_algorithms/partition_point/constrained.cc -std=gnu++26 execution test on Linux/x86_64

2023-12-25 Thread haochen.jiang
On Linux/x86_64,

01f4251b8775c832a92d55e2df57c9ac72eaceef is the first bad commit
commit 01f4251b8775c832a92d55e2df57c9ac72eaceef
Author: Tamar Christina 
Date:   Sun Dec 24 19:18:12 2023 +

middle-end: Support vectorization of loops with multiple exits.

caused

FAIL: gcc.target/i386/sse4_1-stv-1.c scan-assembler-not pxor

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-6822/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/sse4_1-stv-1.c 
--target_board='unix{-m32}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com)
(If you met problems with cascadelake related, disabling AVX512F in command 
line might save that.)
(However, please make sure that there is no potential problems with AVX512.)


[r14-6822 Regression] FAIL: 25_algorithms/partition_point/constrained.cc -std=gnu++26 execution test on Linux/x86_64

2023-12-25 Thread haochen.jiang
On Linux/x86_64,

01f4251b8775c832a92d55e2df57c9ac72eaceef is the first bad commit
commit 01f4251b8775c832a92d55e2df57c9ac72eaceef
Author: Tamar Christina 
Date:   Sun Dec 24 19:18:12 2023 +

middle-end: Support vectorization of loops with multiple exits.

caused

FAIL: 25_algorithms/partition_point/constrained.cc  -std=gnu++20 execution test
FAIL: 25_algorithms/partition_point/constrained.cc  -std=gnu++26 execution test
FAIL: gcc.dg/tree-ssa/gen-vect-26.c scan-tree-dump-times vect "Alignment of 
access forced using peeling" 1
FAIL: gcc.dg/vect/slp-mask-store-1.c -flto -ffat-lto-objects  
scan-tree-dump-times vect "LOOP VECTORIZED" 1
FAIL: gcc.dg/vect/slp-mask-store-1.c scan-tree-dump-times vect "LOOP 
VECTORIZED" 1
FAIL: gcc.target/i386/vect-double-2.c scan-tree-dump-times vect "Vectorized 
loops: 1" 1
FAIL: gcc.target/i386/xorsign.c scan-assembler-not [ \t]or
FAIL: gcc.target/i386/xorsign.c scan-tree-dump-times vect "vectorized 2 loops" 1

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-6822/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/x86_64-linux/libstdc++-v3/testsuite && make check 
RUNTESTFLAGS="conformance.exp=25_algorithms/partition_point/constrained.cc 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="tree-ssa.exp=gcc.dg/tree-ssa/gen-vect-26.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="tree-ssa.exp=gcc.dg/tree-ssa/gen-vect-26.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="vect.exp=gcc.dg/vect/slp-mask-store-1.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="vect.exp=gcc.dg/vect/slp-mask-store-1.c 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/vect-double-2.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/vect-double-2.c 
--target_board='unix{-m64\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/xorsign.c --target_board='unix{-m32\ 
-march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="i386.exp=gcc.target/i386/xorsign.c --target_board='unix{-m64\ 
-march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com.)
(If you met problems with cascadelake related, disabling AVX512F in command 
line might save that.)
(However, please make sure that there is no potential problems with AVX512.)


[r14-6796 Regression] FAIL: g++.dg/abi/mangle-regparm1a.C -std=gnu++98 (test for warnings, line 6) on Linux/x86_64

2023-12-25 Thread haochen.jiang
On Linux/x86_64,

2fa122cae50cd87c1262c4ec18a783ee9bbbdaaa is the first bad commit
commit 2fa122cae50cd87c1262c4ec18a783ee9bbbdaaa
Author: Jason Merrill 
Date:   Thu Dec 21 16:16:37 2023 -0500

testsuite: suppress mangling compatibility aliases

caused

FAIL: g++.dg/abi/mangle-regparm1a.C  -std=gnu++14  (test for warnings, line 6)
FAIL: g++.dg/abi/mangle-regparm1a.C  -std=gnu++17  (test for warnings, line 6)
FAIL: g++.dg/abi/mangle-regparm1a.C  -std=gnu++20  (test for warnings, line 6)
FAIL: g++.dg/abi/mangle-regparm1a.C  -std=gnu++98  (test for warnings, line 6)

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-6796/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=g++.dg/abi/mangle-regparm1a.C --target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=g++.dg/abi/mangle-regparm1a.C --target_board='unix{-m32\ 
-march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com.)
(If you met problems with cascadelake related, disabling AVX512F in command 
line might save that.)
(However, please make sure that there is no potential problems with AVX512.)


Re: Ping: [PATCH] LoongArch: Replace -mexplicit-relocs=auto simple-used address peephole2 with combine

2023-12-25 Thread Xi Ruoyao
On Mon, 2023-12-25 at 10:08 +0800, chenglulu wrote:
> 
> 在 2023/12/24 下午8:59, Xi Ruoyao 写道:
> > On Sat, 2023-12-23 at 18:47 +0800, Xi Ruoyao wrote:
> > > On Sat, 2023-12-23 at 18:44 +0800, Xi Ruoyao wrote:
> > > > On Sat, 2023-12-23 at 10:29 +0800, chenglulu wrote:
> > > > > > The performance drop has nothing to do with this patch. I
> > > > > > found that the h264 performance compiled
> > > > > > by r14-6787 compared to r14-6421 dropped by 6.4%.
> > > > Then I guess we should create a bug report...
> > > > 
> > > > >   But there is a problem. My regression test has the following
> > > > > two fail items.(based on r14-6787)
> > > > > +FAIL: gcc.dg/cpp/_Pragma3.c (test for excess errors)
> > > I guess this is https://gcc.gnu.org/PR28123.
> > > 
> > > > > +FAIL: gcc.dg/pr86617.c scan-rtl-dump-times final "mem/v" 6
> > > I'll take a look on this.  Maybe it will show up with Binutils
> > > trunk (I
> > > just realized I tested this patch with Binutils 2.41, and it's not
> > > sufficient to really test the change).
> > I cannot reproduce the issue on a Gentoo dev machine with Binutils
> > 2.41.50.20231218 and the patch on top of r14-6819.  And in my manual
> > testing (for ruling out the difference caused by default PIE and
> > SSP)
> > the test also passes:
> > 
> > xry111@nanmen2 ~/git-repos/gcc-build $ /home/xry111/git-repos/gcc-
> > build/gcc/xgcc -B/home/xry111/git-repos/gcc-build/gcc/
> > /home/xry111/git-
> > repos/gcc/gcc/testsuite/gcc.dg/pr86617.c -fdiagnostics-plain-output
> > -Os
> > -fdump-rtl-final -ffat-lto-objects -S -o pr86617.s -fno-stack-
> > protector
> > -fno-pie && grep -c mem/v pr86617.c.348r.final
> > 6
> > 
> > Could you recheck with latest GCC master?
> Ok, I'll test again with the latest code.

Per https://gcc.gnu.org/pipermail/gcc-patches/2023-December/641407.html
I need to and "&& true" into the split condition.  I'll test it and send
V2.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH v1] LoongArch: Fixed bug in *bstrins__for_ior_mask template.

2023-12-25 Thread Xi Ruoyao
On Mon, 2023-12-25 at 11:20 +0800, Li Wei wrote:
> We found that using the latest compiled gcc will cause a miscompare error
> when running spec2006 400.perlbench test with -flto turned on.  After testing,
> it was found that only the LoongArch architecture will report errors.
> The first error commit was located through the git bisect command as
> r14-3773-g5b857e87201335.  Through debugging, it was found that the problem
> was that the split condition of the *bstrins__for_ior_mask template was
> empty, which should actually be consistent with the insn condition.
> 
> gcc/ChangeLog:
> 
>   * config/loongarch/loongarch.md: Adjust.

LGTM.  I had some misunderstandings here.  Sorry for the bug.

If a test case can be minimized and added it would be better, but I
understand that for a LTO-revealed issue it can be difficult to do so.

> ---
>  gcc/config/loongarch/loongarch.md | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/config/loongarch/loongarch.md 
> b/gcc/config/loongarch/loongarch.md
> index 7021105b241..2b0609f2f31 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -1489,7 +1489,7 @@ (define_insn_and_split "*bstrins__for_ior_mask"
>    "loongarch_pre_reload_split () && \
>     loongarch_use_bstrins_for_ior_with_mask (mode, operands)"
>    "#"
> -  ""
> +  "&& true"
>    [(set (match_dup 0) (match_dup 1))
>     (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4))
>   (match_dup 3))]

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH] RISC-V: Move RVV V_REGS liveness computation into analyze_loop_vinfo

2023-12-25 Thread Juzhe-Zhong
Currently, we compute RVV V_REGS liveness during better_main_loop_than_p which 
is not appropriate
time to do that since we for example, when have the codes will finally pick 
LMUL = 8 vectorization
factor, we compute liveness for LMUL = 8 multiple times which are redundant.

Since we have leverage the current ARM SVE COST model:

  /* Do one-time initialization based on the vinfo.  */
  loop_vec_info loop_vinfo = dyn_cast (m_vinfo);
  if (!m_analyzed_vinfo)
{
  if (loop_vinfo)
analyze_loop_vinfo (loop_vinfo);

  m_analyzed_vinfo = true;
}

Analyze COST model only once for each cost model.

So here we move dynamic LMUL liveness information into analyze_loop_vinfo.

/* Do one-time initialization of the costs given that we're
   costing the loop vectorization described by LOOP_VINFO.  */
void
costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
{
  ...

  /* Detect whether the LOOP has unexpected spills.  */
  record_potential_unexpected_spills (loop_vinfo);
}

So that we can avoid redundant computations and the current dynamic LMUL cost 
model flow is much
more reasonable and consistent with others.

Tested on RV32 and RV64 no regressions.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (compute_estimated_lmul): Allow 
fractional vecrtor.
(preferred_new_lmul_p): Move RVV V_REGS liveness computation into 
analyze_loop_vinfo.
(has_unexpected_spills_p): New function.
(costs::record_potential_unexpected_spills): Ditto.
(costs::better_main_loop_than_p): Move RVV V_REGS liveness computation 
into analyze_loop_vinfo.
* config/riscv/riscv-vector-costs.h: New functions and variables.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c: Robostify 
test.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-1.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-2.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-4.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-5.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-7.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-10.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-11.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/no-dynamic-lmul-1.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/pr111848.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/pr113112-1.c: Ditto.

---
 gcc/config/riscv/riscv-vector-costs.cc| 110 +++---
 gcc/config/riscv/riscv-vector-costs.h |   8 ++
 .../riscv/rvv/dynamic-lmul-mixed-1.c  |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul1-1.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul1-2.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul1-3.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul1-4.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul1-5.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul1-6.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul1-7.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul2-1.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul2-2.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul2-3.c |   5 +-
 .../costmodel/riscv/rvv/dynamic-lmul2-4.c |   5 +-
 .../costmodel/riscv/rvv/dynamic

[PATCH] RISC-V: Fix misaligned stack offset for interrupt function

2023-12-25 Thread Kito Cheng
`interrupt` function will backup fcsr register, but it fixed to SImode,
it's not big issue since fcsr only used 8 bits so far, however the
offset should still using UNITS_PER_WORD to prevent the stack offset
become non 8 byte aligned, it will cause problem for RV64.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_for_each_saved_reg): Adjust the
offset of fcsr.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/interrupt-misaligned.c: New.
---
 gcc/config/riscv/riscv.cc |  4 ++-
 .../gcc.target/riscv/interrupt-misaligned.c   | 29 +++
 2 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d867c0a03f0..c2b24d3db5a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6790,7 +6790,9 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, 
riscv_save_restore_fn fn,
  || (TARGET_ZFINX
  && (cfun->machine->frame.mask & ~(1 << 
RISCV_PROLOGUE_TEMP_REGNUM)
{
- unsigned int fcsr_size = GET_MODE_SIZE (SImode);
+ /* Always assume FCSR occupy UNITS_PER_WORD to prevent stack
+offset misaligned later.  */
+ unsigned int fcsr_size = UNITS_PER_WORD;
  if (!epilogue)
{
  riscv_save_restore_reg (word_mode, regno, offset, fn);
diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c 
b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
new file mode 100644
index 000..b5f8e6c2bbe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc -mabi=lp64d -fno-schedule-insns 
-fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+
+/*  Make sure no stack offset are misaligned.
+**  interrupt:
+**  ...
+**sd\tt0,40\(sp\)
+**frcsr\tt0
+**sw\tt0,32\(sp\)
+**sd\tt1,24\(sp\)
+**fsd\tft0,8\(sp\)
+**  ...
+**lw\tt0,32\(sp\)
+**fscsr\tt0
+**ld\tt0,40\(sp\)
+**ld\tt1,24\(sp\)
+**fld\tft0,8\(sp\)
+**  ...
+*/
+
+
+void interrupt(void) __attribute__((interrupt));
+void interrupt(void)
+{
+  asm volatile ("# clobber!":::"t0", "t1", "ft0");
+}
+
+/* { dg-final { check-function-bodies "**" "" } } */
-- 
2.40.1



Re: [r14-6770 Regression] FAIL: gcc.dg/gnu23-tag-4.c (test for excess errors) on Linux/x86_64

2023-12-25 Thread Martin Uecker


Yes, I am testing a patch. The DECL_FIELD_BIT_OFFSET are set
inconsistently for some reason.

Martin 

Am Montag, dem 25.12.2023 um 07:49 + schrieb Jiang, Haochen:
> It is not a target specific issue, it will fail if we enabled AVX.
> 
> e.g.:
> 
> $ /export/users/haochenj/env/build_no_bootstrap_master/gcc/xgcc 
> -B/export/users/haochenj/env/build_no_bootstrap_master/gcc/  
> /export/users/haochenj/src/gcc/master/gcc/testsuite/gcc.dg/gnu23-tag-4.c  
> -m64 -mavx   -fdiagnostics-plain-output   -std=gnu23 -S -o gnu23-tag-4.s
> /export/users/haochenj/src/gcc/master/gcc/testsuite/gcc.dg/gnu23-tag-4.c: In 
> function ‘bar’:
> /export/users/haochenj/src/gcc/master/gcc/testsuite/gcc.dg/gnu23-tag-4.c:18:47:
>  error: initialization of ‘struct g *’ from incompatible pointer type ‘struct 
> g *’ [-Wincompatible-pointer-types]
> 
> Thx,
> Haochen
> 
> > -Original Message-
> > From: Martin Uecker 
> > Sent: Friday, December 22, 2023 5:39 PM
> > To: gcc-regress...@gcc.gnu.org; gcc-patches@gcc.gnu.org; Jiang, Haochen
> > ; Joseph Myers 
> > Subject: Re: [r14-6770 Regression] FAIL: gcc.dg/gnu23-tag-4.c (test for 
> > excess
> > errors) on Linux/x86_64
> > 
> > 
> > Hm, this is weird, as it really seems to depend on the -march=  So if 
> > there is
> > really a difference between those structs which make them incompatible on
> > some archs, we should not consider them to be compatible in general.
> > 
> > struct g { int a[n]; int b; } *y;
> > { struct g { int a[4]; int b; } *y2 = y; }
> > 
> > But I do not see what could go wrong here as sizeof / alignment is the same 
> > for
> > n = 4.  So there is something else I missed
> > 
> > 
> > 
> > Am Freitag, dem 22.12.2023 um 05:07 +0800 schrieb haochen.jiang:
> > > On Linux/x86_64,
> > > 
> > > 23fee88f84873b0b8b41c8e5a9b229d533fb4022 is the first bad commit
> > > commit 23fee88f84873b0b8b41c8e5a9b229d533fb4022
> > > Author: Martin Uecker 
> > > Date:   Tue Aug 15 14:58:32 2023 +0200
> > > 
> > > c23: tag compatibility rules for struct and unions
> > > 
> > > caused
> > > 
> > > FAIL: gcc.dg/gnu23-tag-4.c (test for excess errors)
> > > 
> > > with GCC configured with
> > > 
> > > ../../gcc/configure
> > > --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r14-6770/
> > > usr --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld
> > > --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet
> > > --without-isl --enable-libmpx x86_64-linux --disable-bootstrap
> > > 
> > > To reproduce:
> > > 
> > > $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/gnu23-
> > tag-4.c --target_board='unix{-m32\ -march=cascadelake}'"
> > > $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/gnu23-
> > tag-4.c --target_board='unix{-m64\ -march=cascadelake}'"
> > > 
> > > (Please do not reply to this email, for question about this report,
> > > contact me at haochen dot jiang at intel.com.) (If you met problems
> > > with cascadelake related, disabling AVX512F in command line might save
> > that.) (However, please make sure that there is no potential problems with
> > AVX512.)
> 



Re: [PATCH v4 4/6] RISC-V: Adds the prefix "th." for the instructions of XTheadVector.

2023-12-25 Thread juzhe.zh...@rivai.ai
OK.



juzhe.zh...@rivai.ai
 
From: Jun Sha (Joshua)
Date: 2023-12-25 16:14
To: gcc-patches
CC: jim.wilson.gcc; palmer; andrew; philipp.tomsich; jeffreyalaw; 
christoph.muellner; juzhe.zhong; Jun Sha (Joshua); Jin Ma; Xianmiao Qu
Subject: [PATCH v4 4/6] RISC-V: Adds the prefix "th." for the instructions of 
XTheadVector.
This patch adds th. prefix to all XTheadVector instructions by
implementing new assembly output functions. In this version, we 
follow Kito's suggestions and only check the prefix is 'v', so that 
no extra attribute is needed.
 
gcc/ChangeLog:
 
* config/riscv/riscv-protos.h (riscv_asm_output_opcode): 
New function to add assembler insn code prefix/suffix.
* config/riscv/riscv.cc (riscv_asm_output_opcode): Likewise.
* config/riscv/riscv.h (ASM_OUTPUT_OPCODE): Likewise.
 
Co-authored-by: Jin Ma 
Co-authored-by: Xianmiao Qu 
Co-authored-by: Christoph Müllner 
---
gcc/config/riscv/riscv-protos.h   |  1 +
gcc/config/riscv/riscv.cc | 19 +++
gcc/config/riscv/riscv.h  |  4 
.../riscv/rvv/xtheadvector/prefix.c   | 12 
4 files changed, 36 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 31049ef7523..5ea54b45703 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -102,6 +102,7 @@ struct riscv_address_info {
};
/* Routines implemented in riscv.cc.  */
+extern const char *riscv_asm_output_opcode (FILE *asm_out_file, const char *p);
extern enum riscv_symbol_type riscv_classify_symbolic_expression (rtx);
extern bool riscv_symbolic_constant_p (rtx, enum riscv_symbol_type *);
extern int riscv_float_const_rtx_index_for_fli (rtx);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0d1cbc5cb5f..30e6ced5f3f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5636,6 +5636,25 @@ riscv_get_v_regno_alignment (machine_mode mode)
   return lmul;
}
+/* Define ASM_OUTPUT_OPCODE to do anything special before
+   emitting an opcode.  */
+const char *
+riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
+{
+  if (!TARGET_XTHEADVECTOR)
+return p;
+
+  if (current_output_insn == NULL_RTX)
+return p;
+
+  /* We need to add th. prefix to all the xtheadvector
+ insturctions here.*/
+  if (p[0] == 'v')
+fputs ("th.", asm_out_file);
+
+  return p;
+}
+
/* Implement TARGET_PRINT_OPERAND.  The RISCV-specific operand codes are:
'h' Print the high-part relocation associated with OP, after stripping
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 6df9ec73c5e..c33361a254d 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -826,6 +826,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
   asm_fprintf ((FILE), "%U%s", (NAME)); \
   } while (0)
+#undef ASM_OUTPUT_OPCODE
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+  (PTR) = riscv_asm_output_opcode(STREAM, PTR)
+
#define JUMP_TABLES_IN_TEXT_SECTION 0
#define CASE_VECTOR_MODE SImode
#define CASE_VECTOR_PC_RELATIVE (riscv_cmodel != CM_MEDLOW)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c
new file mode 100644
index 000..48867f4ddfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_xtheadvector -mabi=ilp32 -O0" } */
+
+#include "riscv_vector.h"
+
+vint32m1_t
+prefix (vint32m1_t vx, vint32m1_t vy, size_t vl)
+{
+  return __riscv_vadd_vv_i32m1 (vx, vy, vl);
+}
+
+/* { dg-final { scan-assembler {\mth\.v\M} } } */
-- 
2.17.1
 
 


[PATCH v4 4/6] RISC-V: Adds the prefix "th." for the instructions of XTheadVector.

2023-12-25 Thread Jun Sha (Joshua)
This patch adds th. prefix to all XTheadVector instructions by
implementing new assembly output functions. In this version, we 
follow Kito's suggestions and only check the prefix is 'v', so that 
no extra attribute is needed.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_asm_output_opcode): 
New function to add assembler insn code prefix/suffix.
* config/riscv/riscv.cc (riscv_asm_output_opcode): Likewise.
* config/riscv/riscv.h (ASM_OUTPUT_OPCODE): Likewise.

Co-authored-by: Jin Ma 
Co-authored-by: Xianmiao Qu 
Co-authored-by: Christoph Müllner 
---
 gcc/config/riscv/riscv-protos.h   |  1 +
 gcc/config/riscv/riscv.cc | 19 +++
 gcc/config/riscv/riscv.h  |  4 
 .../riscv/rvv/xtheadvector/prefix.c   | 12 
 4 files changed, 36 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 31049ef7523..5ea54b45703 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -102,6 +102,7 @@ struct riscv_address_info {
 };
 
 /* Routines implemented in riscv.cc.  */
+extern const char *riscv_asm_output_opcode (FILE *asm_out_file, const char *p);
 extern enum riscv_symbol_type riscv_classify_symbolic_expression (rtx);
 extern bool riscv_symbolic_constant_p (rtx, enum riscv_symbol_type *);
 extern int riscv_float_const_rtx_index_for_fli (rtx);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0d1cbc5cb5f..30e6ced5f3f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -5636,6 +5636,25 @@ riscv_get_v_regno_alignment (machine_mode mode)
   return lmul;
 }
 
+/* Define ASM_OUTPUT_OPCODE to do anything special before
+   emitting an opcode.  */
+const char *
+riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
+{
+  if (!TARGET_XTHEADVECTOR)
+return p;
+
+  if (current_output_insn == NULL_RTX)
+return p;
+
+  /* We need to add th. prefix to all the xtheadvector
+ insturctions here.*/
+  if (p[0] == 'v')
+fputs ("th.", asm_out_file);
+
+  return p;
+}
+
 /* Implement TARGET_PRINT_OPERAND.  The RISCV-specific operand codes are:
 
'h' Print the high-part relocation associated with OP, after stripping
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 6df9ec73c5e..c33361a254d 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -826,6 +826,10 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
   asm_fprintf ((FILE), "%U%s", (NAME));\
   } while (0)
 
+#undef ASM_OUTPUT_OPCODE
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+  (PTR) = riscv_asm_output_opcode(STREAM, PTR)
+
 #define JUMP_TABLES_IN_TEXT_SECTION 0
 #define CASE_VECTOR_MODE SImode
 #define CASE_VECTOR_PC_RELATIVE (riscv_cmodel != CM_MEDLOW)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c
new file mode 100644
index 000..48867f4ddfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/prefix.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_xtheadvector -mabi=ilp32 -O0" } */
+
+#include "riscv_vector.h"
+
+vint32m1_t
+prefix (vint32m1_t vx, vint32m1_t vy, size_t vl)
+{
+  return __riscv_vadd_vv_i32m1 (vx, vy, vl);
+}
+
+/* { dg-final { scan-assembler {\mth\.v\M} } } */
-- 
2.17.1