Re:[pushed] [PATCH 0/1] LoongArch: Remove gawk extension from a generator script.

2024-08-01 Thread Lulu Cheng

Pushed to r15-2660.

在 2024/7/23 上午10:04, Yang Yujie 写道:

Builds for the LoongArch target fail if the system "awk" is not "gawk".
This patch removes this unnecessary requirement.

Thanks to Jan-Benedict Glaw  for finding and reporting
this issue.

Yang Yujie (1):
   LoongArch: Remove gawk extension from a generator script.

  gcc/config/loongarch/genopts/gen-evolution.awk | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)





[PATCH v2] LoongArch: Use iorn and andn standard pattern names.

2024-08-01 Thread Lulu Cheng
R15-1890 introduced new optabs iorc and andc, and its corresponding
internal functions BIT_{ANDC,IORC}, and if targets defines such optabs
for vector modes.  And in r15-2258 the iorc and andc were renamed to
iorn and andn.
So we changed the andn and iorn implementation templates to the standard
template names.

---
v1 -> v2:
   - Fixed bugs with the [x]vandn implementation in the previous
 version.
   - Add testcases.
   


gcc/ChangeLog:

* config/loongarch/lasx.md (xvandn3): Rename to ...
(andn3): This.
(xvorn3): Rename to ...
(iorn3): This.
* config/loongarch/loongarch-builtins.cc
(CODE_FOR_lsx_vandn_v): Defined as the modified name.
(CODE_FOR_lsx_vorn_v): Likewise.
(CODE_FOR_lasx_xvandn_v): Likewise.
(CODE_FOR_lasx_xvorn_v): Likewise.
(loongarch_expand_builtin_insn): When the builtin function to be
called is __builtin_lasx_xvandn or __builtin_lsx_vandn, swap the
two operands.
* config/loongarch/loongarch.md (n): Rename to ...
(n3): This.
* config/loongarch/lsx.md (vandn3): Rename to ...
(andn3): This.
(vorn3): Rename to ...
(iorn3): This.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/lasx-andn-iorn.c: New test.
* gcc.target/loongarch/lsx-andn-iorn.c: New test.
---
 gcc/config/loongarch/lasx.md  | 10 +++
 gcc/config/loongarch/loongarch-builtins.cc| 10 ---
 gcc/config/loongarch/loongarch.md |  8 +++---
 gcc/config/loongarch/lsx.md   | 10 +++
 .../gcc.target/loongarch/lasx-andn-iorn.c | 11 
 .../gcc.target/loongarch/lsx-andn-iorn.c  | 28 +++
 6 files changed, 59 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 7bd61f8ed5b..ca523880683 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -2716,12 +2716,12 @@ (define_insn "lasx_vext2xv_d_b"
(set_attr "mode" "V4DI")])
 
 ;; Extend loongson-sx to loongson-asx.
-(define_insn "xvandn3"
+(define_insn "andn3"
   [(set (match_operand:LASX 0 "register_operand" "=f")
-   (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f"))
-   (match_operand:LASX 2 "register_operand" "f")))]
+   (and:LASX (not:LASX (match_operand:LASX 2 "register_operand" "f"))
+   (match_operand:LASX 1 "register_operand" "f")))]
   "ISA_HAS_LASX"
-  "xvandn.v\t%u0,%u1,%u2"
+  "xvandn.v\t%u0,%u2,%u1"
   [(set_attr "type" "simd_logic")
(set_attr "mode" "")])
 
@@ -4637,7 +4637,7 @@ (define_insn "lasx_xvssrlrn__"
   [(set_attr "type" "simd_int_arith")
(set_attr "mode" "")])
 
-(define_insn "xvorn3"
+(define_insn "iorn3"
   [(set (match_operand:ILASX 0 "register_operand" "=f")
(ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f"))
   (match_operand:ILASX 1 "register_operand" "f")))]
diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index fbe46833c9b..cf92770de30 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du
 #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s
 #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d
-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3
-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3
+#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3
+#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3
 #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2
 #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2
 #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2
@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si
 #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di
 
-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3
-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3
+#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3
+#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3
 #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2
 #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2
 #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2
@@ -2858,6 +2858,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, 
unsigned int nops,
 case CODE_FOR_lsx_vpickod_b:
 case CODE_FOR_lsx_vpickod_h:
 case CODE_FOR_lsx_vpickod_w:
+case CODE_FOR_lsx_vandn_v:
 case CODE_FOR_lasx_xvilvh_b:
 case CODE_FOR_lasx_xvilvh_h:
 case CODE_FOR_lasx_xvilvh_w:
@@ -2878,6 +2879,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, 
unsigned int nops,
 case 

[PATCH v2] LoongArch: Use iorn and andn standard pattern names.

2024-08-01 Thread Lulu Cheng
R15-1890 introduced new optabs iorc and andc, and its corresponding
internal functions BIT_{ANDC,IORC}, and if targets defines such optabs
for vector modes.  And in r15-2258 the iorc and andc were renamed to
iorn and andn.
So we changed the andn and iorn implementation templates to the standard
template names.

---
v1 -> v2:
   - Fixed bugs with the [x]vandn implementation in the previous
 version.
   - Add testcases.
   


gcc/ChangeLog:

* config/loongarch/lasx.md (xvandn3): Rename to ...
(andn3): This.
(xvorn3): Rename to ...
(iorn3): This.
* config/loongarch/loongarch-builtins.cc
(CODE_FOR_lsx_vandn_v): Defined as the modified name.
(CODE_FOR_lsx_vorn_v): Likewise.
(CODE_FOR_lasx_xvandn_v): Likewise.
(CODE_FOR_lasx_xvorn_v): Likewise.
(loongarch_expand_builtin_insn): When the builtin function to be
called is __builtin_lasx_xvandn or __builtin_lsx_vandn, swap the
two operands.
* config/loongarch/loongarch.md (n): Rename to ...
(n3): This.
* config/loongarch/lsx.md (vandn3): Rename to ...
(andn3): This.
(vorn3): Rename to ...
(iorn3): This.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/lasx-andn-iorn.c: New test.
* gcc.target/loongarch/lsx-andn-iorn.c: New test.
---
 gcc/config/loongarch/lasx.md  | 10 +++
 gcc/config/loongarch/loongarch-builtins.cc| 10 ---
 gcc/config/loongarch/loongarch.md |  8 +++---
 gcc/config/loongarch/lsx.md   | 10 +++
 .../gcc.target/loongarch/lasx-andn-iorn.c | 11 
 .../gcc.target/loongarch/lsx-andn-iorn.c  | 28 +++
 6 files changed, 59 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-andn-iorn.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/lsx-andn-iorn.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 7bd61f8ed5b..ca523880683 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -2716,12 +2716,12 @@ (define_insn "lasx_vext2xv_d_b"
(set_attr "mode" "V4DI")])
 
 ;; Extend loongson-sx to loongson-asx.
-(define_insn "xvandn3"
+(define_insn "andn3"
   [(set (match_operand:LASX 0 "register_operand" "=f")
-   (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f"))
-   (match_operand:LASX 2 "register_operand" "f")))]
+   (and:LASX (not:LASX (match_operand:LASX 2 "register_operand" "f"))
+   (match_operand:LASX 1 "register_operand" "f")))]
   "ISA_HAS_LASX"
-  "xvandn.v\t%u0,%u1,%u2"
+  "xvandn.v\t%u0,%u2,%u1"
   [(set_attr "type" "simd_logic")
(set_attr "mode" "")])
 
@@ -4637,7 +4637,7 @@ (define_insn "lasx_xvssrlrn__"
   [(set_attr "type" "simd_int_arith")
(set_attr "mode" "")])
 
-(define_insn "xvorn3"
+(define_insn "iorn3"
   [(set (match_operand:ILASX 0 "register_operand" "=f")
(ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f"))
   (match_operand:ILASX 1 "register_operand" "f")))]
diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index fbe46833c9b..cf92770de30 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du
 #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s
 #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d
-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3
-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3
+#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3
+#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3
 #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2
 #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2
 #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2
@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si
 #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di
 
-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3
-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3
+#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3
+#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3
 #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2
 #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2
 #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2
@@ -2858,6 +2858,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, 
unsigned int nops,
 case CODE_FOR_lsx_vpickod_b:
 case CODE_FOR_lsx_vpickod_h:
 case CODE_FOR_lsx_vpickod_w:
+case CODE_FOR_lsx_vandn_v:
 case CODE_FOR_lasx_xvilvh_b:
 case CODE_FOR_lasx_xvilvh_h:
 case CODE_FOR_lasx_xvilvh_w:
@@ -2878,6 +2879,7 @@ loongarch_expand_builtin_insn (enum insn_code icode, 
unsigned int nops,
 case 

Re: [PATCH] LoongArch: Rework bswap{hi,si,di}2 definition

2024-07-31 Thread Lulu Cheng



在 2024/7/31 下午6:25, Xi Ruoyao 写道:

On Wed, 2024-07-31 at 16:57 +0800, Lulu Cheng wrote:

在 2024/7/29 下午3:58, Xi Ruoyao 写道:

Per a gcc-help thread we are generating sub-optimal code for
__builtin_bswap{32,64}.  To fix it:

- Use a single revb.d instruction for bswapdi2.
- Use a single revb.2w instruction for bswapsi2 for TARGET_64BIT,
     revb.2h + rotri.w for !TARGET_64BIT.
- Use a single revb.2h instruction for bswapsi2 (x) r>> 16, and a single
     revb.2w instruction for bswapdi2 (x) r>> 32.

Unfortunately I cannot figure out a way to make the compiler generate
revb.4h or revh.{2w,d} instructions.

This optimization is really ingenious and I have no problem.

I also haven't figured out how to generate revb.4h or revh. {2w,d}.
I think we can merge this patch first.

Pushed r15-2433.

Ok. Thanks!


FWIW I tried a naive pattern for revh.2w:

(set (match_operand:DI 0 "register_operand" "=r")
  (ior:DI
(and:DI
  (ashift:DI (match_operand:DI 1 "register_operand" "r")
 (const_int 16))
  (const_int 18446462603027742720))
(and:DI
  (lshiftrt:DI (match_dup 1)
   (const_int 16))
  (const_int 281470681808895
But it seems too complex to be recognized.


I think it needs to be recognized as a bswap operation in the tree-bswap 
phase,


but it seems a bit difficult to be recognized







Re: [PATCH] LoongArch: Rework bswap{hi,si,di}2 definition

2024-07-31 Thread Lulu Cheng



在 2024/7/29 下午3:58, Xi Ruoyao 写道:

Per a gcc-help thread we are generating sub-optimal code for
__builtin_bswap{32,64}.  To fix it:

- Use a single revb.d instruction for bswapdi2.
- Use a single revb.2w instruction for bswapsi2 for TARGET_64BIT,
   revb.2h + rotri.w for !TARGET_64BIT.
- Use a single revb.2h instruction for bswapsi2 (x) r>> 16, and a single
   revb.2w instruction for bswapdi2 (x) r>> 32.

Unfortunately I cannot figure out a way to make the compiler generate
revb.4h or revh.{2w,d} instructions.


This optimization is really ingenious and I have no problem.

I also haven't figured out how to generate revb.4h or revh. {2w,d}.
I think we can merge this patch first.

Thanks.



gcc/ChangeLog:

* config/loongarch/loongarch.md (UNSPEC_REVB_2H, UNSPEC_REVB_4H,
UNSPEC_REVH_D): Remove UNSPECs.
(revb_4h, revh_d): Remove define_insn.
(revb_2h): Define as (rotatert:SI (bswap:SI x) 16) instead of
an UNSPEC.
(revb_2h_extend, revb_2w, *bswapsi2, bswapdi2): New define_insn.
(bswapsi2): Change to define_expand.  Only expand to revb.2h +
rotri.w if !TARGET_64BIT.
(bswapdi2): Change to define_insn of which the output is just a
revb.d instruction.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/revb.c: New test.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?

  gcc/config/loongarch/loongarch.md | 79 ---
  gcc/testsuite/gcc.target/loongarch/revb.c | 61 +
  2 files changed, 104 insertions(+), 36 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/revb.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index ac94a22eafc..f166e834c56 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -20,11 +20,6 @@
  ;; .
  
  (define_c_enum "unspec" [

-  ;; Integer operations that are too cumbersome to describe directly.
-  UNSPEC_REVB_2H
-  UNSPEC_REVB_4H
-  UNSPEC_REVH_D
-
;; Floating-point moves.
UNSPEC_LOAD_LOW
UNSPEC_LOAD_HIGH
@@ -3155,55 +3150,67 @@ (define_insn "alslsi3_extend"
  
  ;; Reverse the order of bytes of operand 1 and store the result in operand 0.
  
-(define_insn "bswaphi2"

-  [(set (match_operand:HI 0 "register_operand" "=r")
-   (bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+(define_insn "revb_2h"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (rotatert:SI (bswap:SI (match_operand:SI 1 "register_operand" "r"))
+(const_int 16)))]
""
"revb.2h\t%0,%1"
[(set_attr "type" "shift")])
  
-(define_insn_and_split "bswapsi2"

-  [(set (match_operand:SI 0 "register_operand" "=r")
-   (bswap:SI (match_operand:SI 1 "register_operand" "r")))]
-  ""
-  "#"
-  ""
-  [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_REVB_2H))
-   (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))]
-  ""
-  [(set_attr "insn_count" "2")])
-
-(define_insn_and_split "bswapdi2"
+(define_insn "revb_2h_extend"
[(set (match_operand:DI 0 "register_operand" "=r")
-   (bswap:DI (match_operand:DI 1 "register_operand" "r")))]
+   (sign_extend:DI
+ (rotatert:SI
+   (bswap:SI (match_operand:SI 1 "register_operand" "r"))
+   (const_int 16]
"TARGET_64BIT"
-  "#"
-  ""
-  [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_REVB_4H))
-   (set (match_dup 0) (unspec:DI [(match_dup 0)] UNSPEC_REVH_D))]
-  ""
-  [(set_attr "insn_count" "2")])
+  "revb.2h\t%0,%1"
+  [(set_attr "type" "shift")])
  
-(define_insn "revb_2h"

-  [(set (match_operand:SI 0 "register_operand" "=r")
-   (unspec:SI [(match_operand:SI 1 "register_operand" "r")] 
UNSPEC_REVB_2H))]
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+   (bswap:HI (match_operand:HI 1 "register_operand" "r")))]
""
"revb.2h\t%0,%1"
[(set_attr "type" "shift")])
  
-(define_insn "revb_4h"

+(define_insn "revb_2w"
[(set (match_operand:DI 0 "register_operand" "=r")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "r")] 
UNSPEC_REVB_4H))]
+   (rotatert:DI (bswap:DI (match_operand:DI 1 "register_operand" "r"))
+(const_int 32)))]
"TARGET_64BIT"
-  "revb.4h\t%0,%1"
+  "revb.2w\t%0,%1"
[(set_attr "type" "shift")])
  
-(define_insn "revh_d"

+(define_insn "*bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (bswap:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_64BIT"
+  "revb.2w\t%0,%1"
+  [(set_attr "type" "shift")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (bswap:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+{
+  if (!TARGET_64BIT)
+{
+  rtx t = gen_reg_rtx (SImode);
+  emit_insn (gen_revb_2h (t, operands[1]));
+  emit_insn (gen_rotrsi3 (operands[0], t, GEN_INT (16)));
+  DONE;
+}
+})
+

Re: [PATCH] LoongArch: Relax ins_zero_bitmask_operand and remove and3_align

2024-07-31 Thread Lulu Cheng



在 2024/7/29 下午3:59, Xi Ruoyao 写道:

In r15-1207 I was too stupid to realize we just need to relax
ins_zero_bitmask_operand to allow using bstrins for aligning, instead of
adding a new split.  And, "> 12" in ins_zero_bitmask_operand also makes
no sense: it rejects bstrins for things like "x & ~4l" with no good
reason.

So fix my errors now.


LGTM!

Thanks!


gcc/ChangeLog:

* config/loongarch/predicates.md (ins_zero_bitmask_operand):
Cover more cases that bstrins can benefit.
(high_bitmask_operand): Remove.
* config/loongarch/constraints.md (Yy): Remove.
* config/loongarch/loongarch.md (and3_align): Remove.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/bstrins-4.c: New test.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?

  gcc/config/loongarch/constraints.md|  4 
  gcc/config/loongarch/loongarch.md  | 17 -
  gcc/config/loongarch/predicates.md |  9 ++---
  gcc/testsuite/gcc.target/loongarch/bstrins-4.c |  9 +
  4 files changed, 11 insertions(+), 28 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-4.c

diff --git a/gcc/config/loongarch/constraints.md 
b/gcc/config/loongarch/constraints.md
index 12cf5e2924a..18da8b31f49 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -292,10 +292,6 @@ (define_constraint "Yx"
 "@internal"
 (match_operand 0 "low_bitmask_operand"))
  
-(define_constraint "Yy"

-   "@internal"
-   (match_operand 0 "high_bitmask_operand"))
-
  (define_constraint "YI"
"@internal
 A replicated vector const in which the replicated value is in the range
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index e1629c5a339..ac94a22eafc 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1588,23 +1588,6 @@ (define_insn "and3_extended"
[(set_attr "move_type" "pick_ins")
 (set_attr "mode" "")])
  
-(define_insn_and_split "and3_align"

-  [(set (match_operand:GPR 0 "register_operand" "=r")
-   (and:GPR (match_operand:GPR 1 "register_operand" "r")
-(match_operand:GPR 2 "high_bitmask_operand" "Yy")))]
-  ""
-  "#"
-  ""
-  [(set (match_dup 0) (match_dup 1))
-   (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0))
-   (const_int 0))]
-{
-  int len;
-
-  len = low_bitmask_len (mode, ~INTVAL (operands[2]));
-  operands[2] = GEN_INT (len);
-})
-
  (define_insn_and_split "*bstrins__for_mask"
[(set (match_operand:GPR 0 "register_operand" "=r")
(and:GPR (match_operand:GPR 1 "register_operand" "r")
diff --git a/gcc/config/loongarch/predicates.md 
b/gcc/config/loongarch/predicates.md
index 58e406ea522..95c2544cc2f 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -293,10 +293,6 @@ (define_predicate "low_bitmask_operand"
(and (match_code "const_int")
 (match_test "low_bitmask_len (mode, INTVAL (op)) > 12")))
  
-(define_predicate "high_bitmask_operand"

-  (and (match_code "const_int")
-   (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0")))
-
  (define_predicate "d_operand"
(and (match_code "reg")
 (match_test "GP_REG_P (REGNO (op))")))
@@ -406,11 +402,10 @@ (define_predicate "muldiv_target_operand"
  
  (define_predicate "ins_zero_bitmask_operand"

(and (match_code "const_int")
-   (match_test "INTVAL (op) != -1")
-   (match_test "INTVAL (op) & 1")
 (match_test "low_bitmask_len (mode, \
 ~UINTVAL (op) | (~UINTVAL(op) - 1)) \
-   > 12")))
+   > 0")
+   (not (match_operand 0 "const_uns_arith_operand"
  
  (define_predicate "const_call_insn_operand"

(match_code "const,symbol_ref,label_ref")
diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-4.c 
b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c
new file mode 100644
index 000..0823cfc386e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bstrins-4.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,2,2" } } */
+
+long
+x (long a)
+{
+  return a & ~4;
+}




Re: [PATCH] LoongArch: Expand some SImode operations through "si3_extend" instructions if TARGET_64BIT

2024-07-30 Thread Lulu Cheng



在 2024/7/26 下午8:43, Xi Ruoyao 写道:

We already had "si3_extend" insns and we hoped the fwprop or combine
passes can use them to remove unnecessary sign extensions.  But this
does not always work: for cases like x << 1 | y, the compiler
tends to do

 (sign_extend:DI
   (ior:SI (ashift:SI (reg:SI $r4)
  (const_int 1))
   (reg:SI $r5)))

instead of

 (ior:DI (sign_extend:DI (ashift:SI (reg:SI $r4) (const_int 1)))
 (sign_extend:DI (reg:SI $r5)))

So we cannot match the ashlsi3_extend instruction here and we get:

 slli.w $r4,$r4,1
 or $r4,$r5,$r4
 slli.w $r4,$r4,0# <= redundant
 jr$r1

To eliminate this redundant extension we need to turn SImode shift etc.
to DImode "si3_extend" operations earlier, when we expand the SImode
operation.  We are already doing this for addition, now do it for
shifts, rotates, substract, multiplication, division, and modulo as
well.

The bytepick.w definition for TARGET_64BIT needs to be adjusted so it
won't be undone by the shift expanding.


LGTM!

I don't know if there will be redundant symbol extension directives 
after this change.:-(


Thanks!



gcc/ChangeLog:

* config/loongarch/loongarch.md (optab): Add (rotatert "rotr").
(3, 3,
sub3, rotr3, mul3): Add a "*" to the insn name
so we can redefine the names with define_expand.
(*si3_extend): Remove "*" so we can use them
in expanders.
(*subsi3_extended, *mulsi3_extended): Likewise, also remove the
trailing "ed" for consistency.
(*si3_extended): Add mode for sign_extend to
prevent an ICE using it in expanders.
(shift_w, arith_w): New define_code_iterator.
(3): New define_expand.  Expand with
si3_extend for SImode if TARGET_64BIT.
(3): Likewise.
(mul3): Expand to mulsi3_extended for SImode if
TARGET_64BIT and ISA_HAS_DIV32.
(3): Expand to si3_extended
for SImode if TARGET_64BIT.
(rotl3): Expand to rotrsi3_extend for SImode if
TARGET_64BIT.
(bytepick_w_): Add mode for lshiftrt and ashift.
(bitsize, bytepick_imm, bytepick_w_ashift_amount): New
define_mode_attr.
(bytepick_w__extend): Adjust for the RTL change
caused by 32-bit shift expanding.  Now bytepick_imm only covers
2 and 3, separate one remaining case to ...
(bytepick_w_1_extend): ... here, new define_insn.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/bitwise_extend.c: New test.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?

  gcc/config/loongarch/loongarch.md | 131 +++---
  .../gcc.target/loongarch/bitwise_extend.c |  45 ++
  2 files changed, 154 insertions(+), 22 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/bitwise_extend.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index bc09712bce7..e1629c5a339 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -546,6 +546,7 @@ (define_code_attr u_bool [(sign_extend "false") (zero_extend 
"true")])
  (define_code_attr optab [(ashift "ashl")
 (ashiftrt "ashr")
 (lshiftrt "lshr")
+(rotatert "rotr")
 (ior "ior")
 (xor "xor")
 (and "and")
@@ -624,6 +625,49 @@ (define_int_attr bytepick_imm [(8 "1")
 (48 "6")
 (56 "7")])
  
+;; Expand some 32-bit operations to si3_extend operations if TARGET_64BIT

+;; so the redundant sign extension can be removed if the output is used as
+;; an input of a bitwise operation.  Note plus, rotl, and div are handled
+;; separately.
+(define_code_iterator shift_w [any_shift rotatert])
+(define_code_iterator arith_w [minus mult])
+
+(define_expand "3"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (shift_w:GPR (match_operand:GPR 1 "register_operand" "r")
+(match_operand:SI 2 "arith_operand" "rI")))]
+  ""
+{
+  if (TARGET_64BIT && mode == SImode)
+{
+  rtx t = gen_reg_rtx (DImode);
+  emit_insn (gen_si3_extend (t, operands[1], operands[2]));
+  t = gen_lowpart (SImode, t);
+  SUBREG_PROMOTED_VAR_P (t) = 1;
+  SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+  emit_move_insn (operands[0], t);
+  DONE;
+}
+})
+
+(define_expand "3"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (arith_w:GPR (match_operand:GPR 1 "register_operand" "r")
+(match_operand:GPR 2 "register_operand" "r")))]
+  ""
+{
+  if (TARGET_64BIT && mode == SImode)
+{
+  rtx t = gen_reg_rtx (DImode);
+  emit_insn (gen_si3_extend (t, operands[1], operands[2]));
+  t = gen_lowpart (SImode, t);
+  SUBREG_PROMOTED_VAR_P (t) = 1;
+  SUBREG_PROMOTED_SET (t, 

Re: [PATCH] LoongArch: Use iorn and andn standard pattern names.

2024-07-28 Thread Lulu Cheng



在 2024/7/28 上午3:30, Andrew Pinski 写道:

On Sat, Jul 27, 2024 at 1:55 AM Lulu Cheng  wrote:

gcc/ChangeLog:

 * config/loongarch/lasx.md (xvandn3): Rename to ...
 (andn3): This.
 (xvorn3): Rename to ...
 (iorn3): This.
 * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vandn_v):
 Defined as the modified name.
 (CODE_FOR_lsx_vorn_v): Likewise.
 (CODE_FOR_lasx_xvandn_v): Likewise.
 (CODE_FOR_lasx_xvorn_v): Likewise.
 * config/loongarch/loongarch.md (n): Rename to ...
 (n3): This.
 * config/loongarch/lsx.md (vandn3): Rename to ...
 (andn3): This. .
 (vorn3): Rename to ...
 (iorn3): This.


You might want to add a testcase like I did for aarch64:
https://gcc.gnu.org/pipermail/gcc-patches/2024-July/658219.html .

Thanks,
Andrew Pinski


Ok, I'll add a test case for this modification.

Thanks.




---
  gcc/config/loongarch/lasx.md   | 4 ++--
  gcc/config/loongarch/loongarch-builtins.cc | 8 
  gcc/config/loongarch/loongarch.md  | 2 +-
  gcc/config/loongarch/lsx.md| 4 ++--
  4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 7bd61f8ed5b..c5fe04de86c 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -2716,7 +2716,7 @@ (define_insn "lasx_vext2xv_d_b"
 (set_attr "mode" "V4DI")])

  ;; Extend loongson-sx to loongson-asx.
-(define_insn "xvandn3"
+(define_insn "andn3"
[(set (match_operand:LASX 0 "register_operand" "=f")
 (and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f"))
 (match_operand:LASX 2 "register_operand" "f")))]
@@ -4637,7 +4637,7 @@ (define_insn "lasx_xvssrlrn__"
[(set_attr "type" "simd_int_arith")
 (set_attr "mode" "")])

-(define_insn "xvorn3"
+(define_insn "iorn3"
[(set (match_operand:ILASX 0 "register_operand" "=f")
 (ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f"))
(match_operand:ILASX 1 "register_operand" "f")))]
diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index fbe46833c9b..f0de80d767b 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
  #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du
  #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s
  #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d
-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3
-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3
+#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3
+#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3
  #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2
  #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2
  #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2
@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
  #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si
  #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di

-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3
-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3
+#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3
+#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3
  #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2
  #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2
  #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 459ad30b9bb..4e4ddd515c9 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1668,7 +1668,7 @@ (define_insn "*norsi3_internal"
[(set_attr "type" "logical")
 (set_attr "mode" "SI")])

-(define_insn "n"
+(define_insn "n3"
[(set (match_operand:X 0 "register_operand" "=r")
 (neg_bitwise:X
 (not:X (match_operand:X 1 "register_operand" "r"))
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 454cda47876..dcb667a6ce5 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -2344,7 +2344,7 @@ (define_insn_and_split "vec_concatv4sf"
  }
[(set_attr "mode" "V4SF")])

-(define_insn "vandn3"
+(define_insn "andn3"
[(set (match_operand:LSX 0 "register_operand" "=f")
 (and:LSX (not:LSX (match_operand:LSX 1 "register_operand" "f"))
  (match_operand:LSX 2 "register_operand" "f")))]
@@ -3028,7 +3028,7 @@ (define_insn "lsx_vssrlrn__"
[(set_attr "type" "simd_int_arith")
 (set_attr "mode" "")])

-(define_insn "vorn3"
+(define_insn "iorn3"
[(set (match_operand:ILSX 0 "register_operand" "=f")
 (ior:ILSX (not:ILSX (match_operand:ILSX 2 "register_operand" "f"))
   (match_operand:ILSX 1 "register_operand" "f")))]
--
2.39.3





[PATCH] LoongArch: Use iorn and andn standard pattern names.

2024-07-27 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/lasx.md (xvandn3): Rename to ...
(andn3): This.
(xvorn3): Rename to ...
(iorn3): This.
* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vandn_v):
Defined as the modified name.
(CODE_FOR_lsx_vorn_v): Likewise.
(CODE_FOR_lasx_xvandn_v): Likewise.
(CODE_FOR_lasx_xvorn_v): Likewise.
* config/loongarch/loongarch.md (n): Rename to ...
(n3): This.
* config/loongarch/lsx.md (vandn3): Rename to ...
(andn3): This.
(vorn3): Rename to ...
(iorn3): This.
---
 gcc/config/loongarch/lasx.md   | 4 ++--
 gcc/config/loongarch/loongarch-builtins.cc | 8 
 gcc/config/loongarch/loongarch.md  | 2 +-
 gcc/config/loongarch/lsx.md| 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 7bd61f8ed5b..c5fe04de86c 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -2716,7 +2716,7 @@ (define_insn "lasx_vext2xv_d_b"
(set_attr "mode" "V4DI")])
 
 ;; Extend loongson-sx to loongson-asx.
-(define_insn "xvandn3"
+(define_insn "andn3"
   [(set (match_operand:LASX 0 "register_operand" "=f")
(and:LASX (not:LASX (match_operand:LASX 1 "register_operand" "f"))
(match_operand:LASX 2 "register_operand" "f")))]
@@ -4637,7 +4637,7 @@ (define_insn "lasx_xvssrlrn__"
   [(set_attr "type" "simd_int_arith")
(set_attr "mode" "")])
 
-(define_insn "xvorn3"
+(define_insn "iorn3"
   [(set (match_operand:ILASX 0 "register_operand" "=f")
(ior:ILASX (not:ILASX (match_operand:ILASX 2 "register_operand" "f"))
   (match_operand:ILASX 1 "register_operand" "f")))]
diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index fbe46833c9b..f0de80d767b 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -458,8 +458,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du
 #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s
 #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d
-#define CODE_FOR_lsx_vandn_v CODE_FOR_vandnv16qi3
-#define CODE_FOR_lsx_vorn_v CODE_FOR_vornv16qi3
+#define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3
+#define CODE_FOR_lsx_vorn_v CODE_FOR_iornv16qi3
 #define CODE_FOR_lsx_vneg_b CODE_FOR_vnegv16qi2
 #define CODE_FOR_lsx_vneg_h CODE_FOR_vnegv8hi2
 #define CODE_FOR_lsx_vneg_w CODE_FOR_vnegv4si2
@@ -692,8 +692,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lasx_xvrepli_w CODE_FOR_lasx_xvrepliv8si
 #define CODE_FOR_lasx_xvrepli_d CODE_FOR_lasx_xvrepliv4di
 
-#define CODE_FOR_lasx_xvandn_v CODE_FOR_xvandnv32qi3
-#define CODE_FOR_lasx_xvorn_v CODE_FOR_xvornv32qi3
+#define CODE_FOR_lasx_xvandn_v CODE_FOR_andnv32qi3
+#define CODE_FOR_lasx_xvorn_v CODE_FOR_iornv32qi3
 #define CODE_FOR_lasx_xvneg_b CODE_FOR_negv32qi2
 #define CODE_FOR_lasx_xvneg_h CODE_FOR_negv16hi2
 #define CODE_FOR_lasx_xvneg_w CODE_FOR_negv8si2
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 459ad30b9bb..4e4ddd515c9 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1668,7 +1668,7 @@ (define_insn "*norsi3_internal"
   [(set_attr "type" "logical")
(set_attr "mode" "SI")])
 
-(define_insn "n"
+(define_insn "n3"
   [(set (match_operand:X 0 "register_operand" "=r")
(neg_bitwise:X
(not:X (match_operand:X 1 "register_operand" "r"))
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 454cda47876..dcb667a6ce5 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -2344,7 +2344,7 @@ (define_insn_and_split "vec_concatv4sf"
 }
   [(set_attr "mode" "V4SF")])
 
-(define_insn "vandn3"
+(define_insn "andn3"
   [(set (match_operand:LSX 0 "register_operand" "=f")
(and:LSX (not:LSX (match_operand:LSX 1 "register_operand" "f"))
 (match_operand:LSX 2 "register_operand" "f")))]
@@ -3028,7 +3028,7 @@ (define_insn "lsx_vssrlrn__"
   [(set_attr "type" "simd_int_arith")
(set_attr "mode" "")])
 
-(define_insn "vorn3"
+(define_insn "iorn3"
   [(set (match_operand:ILSX 0 "register_operand" "=f")
(ior:ILSX (not:ILSX (match_operand:ILSX 2 "register_operand" "f"))
  (match_operand:ILSX 1 "register_operand" "f")))]
-- 
2.39.3



[PATCH] LoongArch: Use iorn and andn standard pattern names for scalar modes.

2024-07-27 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.md (n): Rename to ...
(n3): This.
---
 gcc/config/loongarch/loongarch.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 459ad30b9bb..4e4ddd515c9 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1668,7 +1668,7 @@ (define_insn "*norsi3_internal"
   [(set_attr "type" "logical")
(set_attr "mode" "SI")])
 
-(define_insn "n"
+(define_insn "n3"
   [(set (match_operand:X 0 "register_operand" "=r")
(neg_bitwise:X
(not:X (match_operand:X 1 "register_operand" "r"))
-- 
2.39.3



Re: [PATCH] LoongArch: Use iorn and andn standard pattern names for scalar modes.

2024-07-27 Thread Lulu Cheng



在 2024/7/27 下午4:41, Xi Ruoyao 写道:

On Sat, 2024-07-27 at 16:36 +0800, Lulu Cheng wrote:

gcc/ChangeLog:

* config/loongarch/loongarch.md (n): Rename to ...
(n3): This.

Ok.

Note that [x]vorn3 and [x]vandn3 should be renamed as well.


Uh, I just forgot about them, I'm modifying the content of the vector.

Thanks!




---
  gcc/config/loongarch/loongarch.md | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 459ad30b9bb..4e4ddd515c9 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1668,7 +1668,7 @@ (define_insn "*norsi3_internal"
    [(set_attr "type" "logical")
     (set_attr "mode" "SI")])
  
-(define_insn "n"

+(define_insn "n3"
    [(set (match_operand:X 0 "register_operand" "=r")
    (neg_bitwise:X
        (not:X (match_operand:X 1 "register_operand" "r"))




[PATCH] LoongArch: Use iorn and andn standard pattern names for scalar modes.

2024-07-27 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.md (n): Rename to ...
(n3): This.
---
 gcc/config/loongarch/loongarch.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 459ad30b9bb..4e4ddd515c9 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1668,7 +1668,7 @@ (define_insn "*norsi3_internal"
   [(set_attr "type" "logical")
(set_attr "mode" "SI")])
 
-(define_insn "n"
+(define_insn "n3"
   [(set (match_operand:X 0 "register_operand" "=r")
(neg_bitwise:X
(not:X (match_operand:X 1 "register_operand" "r"))
-- 
2.39.3



Re:[pushed] [PATCH] LoongArch: Organize the code related to split move and merge the same functions.

2024-07-19 Thread Lulu Cheng

Pushed to r15-2167.

在 2024/7/13 下午5:04, Lulu Cheng 写道:

gcc/ChangeLog:

* config/loongarch/loongarch-protos.h
(loongarch_split_128bit_move): Delete.
(loongarch_split_128bit_move_p): Delete.
(loongarch_split_256bit_move): Delete.
(loongarch_split_256bit_move_p): Delete.
(loongarch_split_vector_move): Add a function declaration.
* config/loongarch/loongarch.cc
(loongarch_vector_costs::finish_cost): Adjust the code
formatting.
(loongarch_split_vector_move_p): Merge
loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
(loongarch_split_move_p): Merge code.
(loongarch_split_move): Likewise.
(loongarch_split_128bit_move_p): Delete.
(loongarch_split_256bit_move_p): Delete.
(loongarch_split_128bit_move): Delete.
(loongarch_split_vector_move): Merge loongarch_split_128bit_move
and loongarch_split_256bit_move.
(loongarch_split_256bit_move): Delete.
(loongarch_global_init): Remove the extra semicolon at the
end of the function.
* config/loongarch/loongarch.md (*movdf_softfloat):  Added a new
condition TARGET_64BIT.
---
  gcc/config/loongarch/loongarch-protos.h |   5 +-
  gcc/config/loongarch/loongarch.cc   | 221 ++--
  gcc/config/loongarch/loongarch.md   |   1 +
  3 files changed, 58 insertions(+), 169 deletions(-)

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index e238d795a73..85f6e894399 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx);
  extern void loongarch_split_move (rtx, rtx);
  extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
  extern void loongarch_split_plus_constant (rtx *, machine_mode);
-extern void loongarch_split_128bit_move (rtx, rtx);
-extern bool loongarch_split_128bit_move_p (rtx, rtx);
-extern void loongarch_split_256bit_move (rtx, rtx);
-extern bool loongarch_split_256bit_move_p (rtx, rtx);
+extern void loongarch_split_vector_move (rtx, rtx);
  extern const char *loongarch_output_move (rtx, rtx);
  #ifdef RTX_CODE
  extern void loongarch_expand_scc (rtx *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 8eb47ff95c3..c7a02103ef5 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4354,10 +4354,10 @@ void
  loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
  {
loop_vec_info loop_vinfo = dyn_cast (m_vinfo);
+
if (loop_vinfo)
-{
-  m_suggested_unroll_factor = determine_suggested_unroll_factor 
(loop_vinfo);
-}
+m_suggested_unroll_factor
+  = determine_suggested_unroll_factor (loop_vinfo);
  
vector_costs::finish_cost (scalar_costs);

  }
@@ -4423,6 +4423,7 @@ loongarch_subword (rtx op, bool high_p)
return simplify_gen_subreg (word_mode, op, mode, byte);
  }
  
+static bool loongarch_split_vector_move_p (rtx dest, rtx src);

  /* Return true if a move from SRC to DEST should be split into two.
 SPLIT_TYPE describes the split condition.  */
  
@@ -,13 +4445,11 @@ loongarch_split_move_p (rtx dest, rtx src)

return false;
  }
  
-  /* Check if LSX moves need splitting.  */

-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
-return loongarch_split_128bit_move_p (dest, src);
  
-  /* Check if LASX moves need splitting.  */

-  if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
-return loongarch_split_256bit_move_p (dest, src);
+  /* Check if vector moves need splitting.  */
+  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+  || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+return loongarch_split_vector_move_p (dest, src);
  
/* Otherwise split all multiword moves.  */

return size > UNITS_PER_WORD;
@@ -4463,10 +4462,9 @@ void
  loongarch_split_move (rtx dest, rtx src)
  {
gcc_checking_assert (loongarch_split_move_p (dest, src));
-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
-loongarch_split_128bit_move (dest, src);
-  else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
-loongarch_split_256bit_move (dest, src);
+  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+  || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+loongarch_split_vector_move (dest, src);
else
  gcc_unreachable ();
  }
@@ -4588,224 +4586,117 @@ loongarch_output_move_index_float (rtx x, 
machine_mode mode, bool ldr)
  
return insn[ldr][index-2];

  }
-/* Return true if a 128-bit move from SRC to DEST should be split.  */
-
-bool
-loongarch_split_128bit_move_p (rtx dest, rtx src)
-{
-  /* LSX-to-LSX moves can be done in a single instruction.  */
-  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
-return false;
-
-  /* Check for LSX loads and stores.  */
-  if (FP_REG_RTX_P (dest) && MEM_P (src))
-return false;
-  if

Re: [PATCH] LoongArch: Implement scalar isinf, isnormal, and isfinite via fclass

2024-07-15 Thread Lulu Cheng



在 2024/7/11 下午7:45, Xi Ruoyao 写道:

Doing so can avoid loading FP constants from the memory.  It also
partially fixes PR 66462 as fclass does not signal on sNaN.

gcc/ChangeLog:

* config/loongarch/loongarch.md (extendsidi2): Add ("=r", "f")
alternative and use movfr2gr.s for it.  The spec clearly states
movfr2gr.s sign extends the value to GRLEN.
(fclass_): Make the result SImode instead of a floating
mode.  The fclass results are really not FP values.
(FCLASS_MASK): New define_int_iterator.
(fclass_optab): New define_int_attr.
(): New define_expand
template.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/fclass-compile.c: New test.
* gcc.target/loongarch/fclass-run.c: New test.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  There are two
regressions: range-sincos.c and vrp-float-abs-1.c but they shall be
fixed by
https://gcc.gnu.org/pipermail/gcc-patches/2024-July/656937.html.

There is a redundant "andi" in the code generation for the test case:
https://gcc.gnu.org/pipermail/gcc-patches/2024-July/656764.html.

I suppose the fix of this redundant "andi" is using word_mode instead
of SImode for operand 0, but it does not work as at now:
https://gcc.gnu.org/pipermail/gcc-patches/2024-July/656772.html.

Ok for trunk (now, or after the fix for range-sincos.c and
vrp-float-abs-1.c are committed)?  IMO the redundant "andi" can be fixed
later.


Hi,

g++.dg/opt/pr107569. C and range-sincos.c vrp-float-abs-1.c is the same 
issue, right?


And I have no objection to code modifications. But I think it's better 
to wait until this builtin


function is fixed.

Thanks!



  gcc/config/loongarch/loongarch.md | 53 ---
  .../gcc.target/loongarch/fclass-compile.c | 20 +++
  .../gcc.target/loongarch/fclass-run.c | 53 +++
  3 files changed, 119 insertions(+), 7 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-compile.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/fclass-run.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index e4434c3bd4e..b3cae49832e 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1829,16 +1829,17 @@ (define_insn "*zero_extendhi_truncqi"
  ;;  
  
  (define_insn "extendsidi2"

-  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r")
(sign_extend:DI
-   (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k")))]
+   (match_operand:SI 1 "nonimmediate_operand" "r,ZC,m,k,f")))]
"TARGET_64BIT"
"@
 slli.w\t%0,%1,0
 ldptr.w\t%0,%1
 ld.w\t%0,%1
-   ldx.w\t%0,%1"
-  [(set_attr "move_type" "sll0,load,load,load")
+   ldx.w\t%0,%1
+   movfr2gr.s\t%0,%1"
+  [(set_attr "move_type" "sll0,load,load,load,mftg")
 (set_attr "mode" "DI")])
  
  (define_insn "extend2"

@@ -4162,14 +4163,52 @@ (define_insn "loongarch_movgr2fcsr"
"movgr2fcsr\t$r%0,%1")
  
  (define_insn "fclass_"

-  [(set (match_operand:ANYF 0 "register_operand" "=f")
-   (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
- UNSPEC_FCLASS))]
+  [(set (match_operand:SI 0 "register_operand" "=f")
+   (unspec:SI [(match_operand:ANYF 1 "register_operand" "f")]
+  UNSPEC_FCLASS))]
"TARGET_HARD_FLOAT"
"fclass.\t%0,%1"
[(set_attr "type" "unknown")
 (set_attr "mode" "")])
  
+(define_int_iterator FCLASS_MASK [68 136 952])

+(define_int_attr fclass_optab
+  [(68 "isinf")
+   (136"isnormal")
+   (952"isfinite")])
+
+(define_expand "2"
+  [(match_operand:SI   0 "register_operand" "=r")
+   (match_operand:ANYF 1 "register_operand" " f")
+   (const_int FCLASS_MASK)]
+  "TARGET_HARD_FLOAT"
+  {
+rtx ft0 = gen_reg_rtx (SImode);
+rtx t0 = gen_reg_rtx (word_mode);
+rtx mask = GEN_INT ();
+
+emit_insn (gen_fclass_ (ft0, operands[1]));
+
+if (TARGET_64BIT)
+  emit_insn (gen_extend_insn (t0, ft0, DImode, SImode, 0));
+else
+  emit_move_insn (t0, ft0);
+
+emit_move_insn (t0, gen_rtx_AND (word_mode, t0, mask));
+emit_move_insn (t0, gen_rtx_NE (word_mode, t0, const0_rtx));
+
+if (TARGET_64BIT)
+  {
+   t0 = lowpart_subreg (SImode, t0, DImode);
+   SUBREG_PROMOTED_VAR_P (t0) = 1;
+   SUBREG_PROMOTED_SET (t0, SRP_SIGNED);
+  }
+
+emit_move_insn (operands[0], t0);
+
+DONE;
+  })
+
  (define_insn "bytepick_w_"
[(set (match_operand:SI 0 "register_operand" "=r")
(ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
diff --git a/gcc/testsuite/gcc.target/loongarch/fclass-compile.c 
b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c
new file mode 100644
index 000..9c24d6e263c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fclass-compile.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { 

[PATCH] LoongArch: Organize the code related to split move and merge the same functions.

2024-07-13 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch-protos.h
(loongarch_split_128bit_move): Delete.
(loongarch_split_128bit_move_p): Delete.
(loongarch_split_256bit_move): Delete.
(loongarch_split_256bit_move_p): Delete.
(loongarch_split_vector_move): Add a function declaration.
* config/loongarch/loongarch.cc
(loongarch_vector_costs::finish_cost): Adjust the code
formatting.
(loongarch_split_vector_move_p): Merge
loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
(loongarch_split_move_p): Merge code.
(loongarch_split_move): Likewise.
(loongarch_split_128bit_move_p): Delete.
(loongarch_split_256bit_move_p): Delete.
(loongarch_split_128bit_move): Delete.
(loongarch_split_vector_move): Merge loongarch_split_128bit_move
and loongarch_split_256bit_move.
(loongarch_split_256bit_move): Delete.
(loongarch_global_init): Remove the extra semicolon at the
end of the function.
* config/loongarch/loongarch.md (*movdf_softfloat):  Added a new
condition TARGET_64BIT.
---
 gcc/config/loongarch/loongarch-protos.h |   5 +-
 gcc/config/loongarch/loongarch.cc   | 221 ++--
 gcc/config/loongarch/loongarch.md   |   1 +
 3 files changed, 58 insertions(+), 169 deletions(-)

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index e238d795a73..85f6e894399 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx);
 extern void loongarch_split_move (rtx, rtx);
 extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
 extern void loongarch_split_plus_constant (rtx *, machine_mode);
-extern void loongarch_split_128bit_move (rtx, rtx);
-extern bool loongarch_split_128bit_move_p (rtx, rtx);
-extern void loongarch_split_256bit_move (rtx, rtx);
-extern bool loongarch_split_256bit_move_p (rtx, rtx);
+extern void loongarch_split_vector_move (rtx, rtx);
 extern const char *loongarch_output_move (rtx, rtx);
 #ifdef RTX_CODE
 extern void loongarch_expand_scc (rtx *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 8eb47ff95c3..c7a02103ef5 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4354,10 +4354,10 @@ void
 loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
 {
   loop_vec_info loop_vinfo = dyn_cast (m_vinfo);
+
   if (loop_vinfo)
-{
-  m_suggested_unroll_factor = determine_suggested_unroll_factor 
(loop_vinfo);
-}
+m_suggested_unroll_factor
+  = determine_suggested_unroll_factor (loop_vinfo);
 
   vector_costs::finish_cost (scalar_costs);
 }
@@ -4423,6 +4423,7 @@ loongarch_subword (rtx op, bool high_p)
   return simplify_gen_subreg (word_mode, op, mode, byte);
 }
 
+static bool loongarch_split_vector_move_p (rtx dest, rtx src);
 /* Return true if a move from SRC to DEST should be split into two.
SPLIT_TYPE describes the split condition.  */
 
@@ -,13 +4445,11 @@ loongarch_split_move_p (rtx dest, rtx src)
return false;
 }
 
-  /* Check if LSX moves need splitting.  */
-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
-return loongarch_split_128bit_move_p (dest, src);
 
-  /* Check if LASX moves need splitting.  */
-  if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
-return loongarch_split_256bit_move_p (dest, src);
+  /* Check if vector moves need splitting.  */
+  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+  || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+return loongarch_split_vector_move_p (dest, src);
 
   /* Otherwise split all multiword moves.  */
   return size > UNITS_PER_WORD;
@@ -4463,10 +4462,9 @@ void
 loongarch_split_move (rtx dest, rtx src)
 {
   gcc_checking_assert (loongarch_split_move_p (dest, src));
-  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
-loongarch_split_128bit_move (dest, src);
-  else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
-loongarch_split_256bit_move (dest, src);
+  if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
+  || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
+loongarch_split_vector_move (dest, src);
   else
 gcc_unreachable ();
 }
@@ -4588,224 +4586,117 @@ loongarch_output_move_index_float (rtx x, 
machine_mode mode, bool ldr)
 
   return insn[ldr][index-2];
 }
-/* Return true if a 128-bit move from SRC to DEST should be split.  */
-
-bool
-loongarch_split_128bit_move_p (rtx dest, rtx src)
-{
-  /* LSX-to-LSX moves can be done in a single instruction.  */
-  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
-return false;
-
-  /* Check for LSX loads and stores.  */
-  if (FP_REG_RTX_P (dest) && MEM_P (src))
-return false;
-  if (FP_REG_RTX_P (src) && MEM_P (dest))
-return false;
-
-  /* Check for LSX set to an immediate const vector with valid 

Re:[pushed] [PATCH 2/2] LoongArch: Remove unreachable codes.

2024-07-11 Thread Lulu Cheng

Pushed to r15-1987.

在 2024/7/4 下午5:56, Lulu Cheng 写道:

gcc/ChangeLog:

* config/loongarch/loongarch.cc
(loongarch_split_move): Delete.
(loongarch_hard_regno_mode_ok_uncached): Likewise.
* config/loongarch/loongarch.md
(move_doubleword_fpr): Likewise.
(load_low): Likewise.
(load_high): Likewise.
(store_word): Likewise.
(movgr2frh): Likewise.
(movfrh2gr): Likewise.
---
  gcc/config/loongarch/loongarch.cc |  47 +++--
  gcc/config/loongarch/loongarch.md | 109 --
  2 files changed, 8 insertions(+), 148 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 803ed0575bd..ebd418ab115 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4382,42 +4382,13 @@ loongarch_split_move_p (rtx dest, rtx src)
  void
  loongarch_split_move (rtx dest, rtx src)
  {
-  rtx low_dest;
-
gcc_checking_assert (loongarch_split_move_p (dest, src));
if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
  loongarch_split_128bit_move (dest, src);
else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
  loongarch_split_256bit_move (dest, src);
-  else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
-{
-  if (!TARGET_64BIT && GET_MODE (dest) == DImode)
-   emit_insn (gen_move_doubleword_fprdi (dest, src));
-  else if (!TARGET_64BIT && GET_MODE (dest) == DFmode)
-   emit_insn (gen_move_doubleword_fprdf (dest, src));
-  else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
-   emit_insn (gen_move_doubleword_fprtf (dest, src));
-  else
-   gcc_unreachable ();
-}
else
-{
-  /* The operation can be split into two normal moves.  Decide in
-which order to do them.  */
-  low_dest = loongarch_subword (dest, false);
-  if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
-   {
- loongarch_emit_move (loongarch_subword (dest, true),
-  loongarch_subword (src, true));
- loongarch_emit_move (low_dest, loongarch_subword (src, false));
-   }
-  else
-   {
- loongarch_emit_move (low_dest, loongarch_subword (src, false));
- loongarch_emit_move (loongarch_subword (dest, true),
-  loongarch_subword (src, true));
-   }
-}
+gcc_unreachable ();
  }
  
  /* Check if adding an integer constant value for a specific mode can be

@@ -6688,20 +6659,18 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int 
regno, machine_mode mode)
size = GET_MODE_SIZE (mode);
mclass = GET_MODE_CLASS (mode);
  
-  if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode)

+  if (GP_REG_P (regno)
+  && !LSX_SUPPORTED_MODE_P (mode)
&& !LASX_SUPPORTED_MODE_P (mode))
  return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
  
-  /* For LSX, allow TImode and 128-bit vector modes in all FPR.  */

-  if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode))
-return true;
-
-  /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR.  */
-  if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode))
-return true;
-
if (FP_REG_P (regno))
  {
+  /* Allow 128-bit or 256-bit vector modes in all FPR.  */
+  if (LSX_SUPPORTED_MODE_P (mode)
+ || LASX_SUPPORTED_MODE_P (mode))
+   return true;
+
if (mclass == MODE_FLOAT
  || mclass == MODE_COMPLEX_FLOAT
  || mclass == MODE_VECTOR_FLOAT)
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 25c1d323ba0..21890a2d94b 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -400,9 +400,6 @@ (define_mode_iterator X [(SI "!TARGET_64BIT") (DI 
"TARGET_64BIT")])
  ;; 64-bit modes for which we provide move patterns.
  (define_mode_iterator MOVE64 [DI DF])
  
-;; 128-bit modes for which we provide move patterns on 64-bit targets.

-(define_mode_iterator MOVE128 [TI TF])
-
  ;; Iterator for sub-32-bit integer modes.
  (define_mode_iterator SHORT [QI HI])
  
@@ -421,12 +418,6 @@ (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")

  (define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT")
 (DI "TARGET_DOUBLE_FLOAT")])
  
-;; A mode for which moves involving FPRs may need to be split.

-(define_mode_iterator SPLITF
-  [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
-   (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
-   (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")])
-
  ;; A mode for anything with 32 bits or more, and able to be loaded with
  ;; the same addressing mode as ld.w.
  (define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF])
@@ -2421,41 +2412,6 @@ (define_insn "*movdf_softfloat"
[(se

Re:[pushed] [PATCH 1/2] LoongArch: TFmode is not allowed to be stored in the float register.

2024-07-11 Thread Lulu Cheng

Pushed to r15-1986.

在 2024/7/4 下午5:56, Lulu Cheng 写道:

PR target/115752

gcc/ChangeLog:

* config/loongarch/loongarch.cc
(loongarch_hard_regno_mode_ok_uncached): Replace
UNITS_PER_FPVALUE with UNITS_PER_HWFPVALUE.
* config/loongarch/loongarch.h (UNITS_PER_FPVALUE): Delete.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/pr115752.c: New test.
---
  gcc/config/loongarch/loongarch.cc | 2 +-
  gcc/config/loongarch/loongarch.h  | 7 ---
  gcc/testsuite/gcc.target/loongarch/pr115752.c | 8 
  3 files changed, 9 insertions(+), 8 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/pr115752.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index e2ff2af89e2..803ed0575bd 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6705,7 +6705,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int 
regno, machine_mode mode)
if (mclass == MODE_FLOAT
  || mclass == MODE_COMPLEX_FLOAT
  || mclass == MODE_VECTOR_FLOAT)
-   return size <= UNITS_PER_FPVALUE;
+   return size <= UNITS_PER_HWFPVALUE;
  
/* Allow integer modes that fit into a single register.  We need

 to put integers into FPRs when using instructions like CVT
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index b9323aba394..5efeae53be6 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -146,13 +146,6 @@ along with GCC; see the file COPYING3.  If not see
  #define UNITS_PER_HWFPVALUE \
(TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG)
  
-/* The largest size of value that can be held in floating-point

-   registers.  */
-#define UNITS_PER_FPVALUE \
-  (TARGET_SOFT_FLOAT ? 0 \
-   : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \
-: LA_LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
-
  /* The number of bytes in a double.  */
  #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
  
diff --git a/gcc/testsuite/gcc.target/loongarch/pr115752.c b/gcc/testsuite/gcc.target/loongarch/pr115752.c

new file mode 100644
index 000..df4bae524f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr115752.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+
+long double
+test (long double xx)
+{
+   __asm ("" :: "f"(xx)); /* { dg-error "inconsistent operand constraints in an 
'asm'" } */
+   return xx + 1;
+}




[PATCH 2/2] LoongArch: Remove unreachable codes.

2024-07-04 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc
(loongarch_split_move): Delete.
(loongarch_hard_regno_mode_ok_uncached): Likewise.
* config/loongarch/loongarch.md
(move_doubleword_fpr): Likewise.
(load_low): Likewise.
(load_high): Likewise.
(store_word): Likewise.
(movgr2frh): Likewise.
(movfrh2gr): Likewise.
---
 gcc/config/loongarch/loongarch.cc |  47 +++--
 gcc/config/loongarch/loongarch.md | 109 --
 2 files changed, 8 insertions(+), 148 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 803ed0575bd..ebd418ab115 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4382,42 +4382,13 @@ loongarch_split_move_p (rtx dest, rtx src)
 void
 loongarch_split_move (rtx dest, rtx src)
 {
-  rtx low_dest;
-
   gcc_checking_assert (loongarch_split_move_p (dest, src));
   if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
 loongarch_split_128bit_move (dest, src);
   else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
 loongarch_split_256bit_move (dest, src);
-  else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
-{
-  if (!TARGET_64BIT && GET_MODE (dest) == DImode)
-   emit_insn (gen_move_doubleword_fprdi (dest, src));
-  else if (!TARGET_64BIT && GET_MODE (dest) == DFmode)
-   emit_insn (gen_move_doubleword_fprdf (dest, src));
-  else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
-   emit_insn (gen_move_doubleword_fprtf (dest, src));
-  else
-   gcc_unreachable ();
-}
   else
-{
-  /* The operation can be split into two normal moves.  Decide in
-which order to do them.  */
-  low_dest = loongarch_subword (dest, false);
-  if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
-   {
- loongarch_emit_move (loongarch_subword (dest, true),
-  loongarch_subword (src, true));
- loongarch_emit_move (low_dest, loongarch_subword (src, false));
-   }
-  else
-   {
- loongarch_emit_move (low_dest, loongarch_subword (src, false));
- loongarch_emit_move (loongarch_subword (dest, true),
-  loongarch_subword (src, true));
-   }
-}
+gcc_unreachable ();
 }
 
 /* Check if adding an integer constant value for a specific mode can be
@@ -6688,20 +6659,18 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int 
regno, machine_mode mode)
   size = GET_MODE_SIZE (mode);
   mclass = GET_MODE_CLASS (mode);
 
-  if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode)
+  if (GP_REG_P (regno)
+  && !LSX_SUPPORTED_MODE_P (mode)
   && !LASX_SUPPORTED_MODE_P (mode))
 return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
 
-  /* For LSX, allow TImode and 128-bit vector modes in all FPR.  */
-  if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode))
-return true;
-
-  /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR.  */
-  if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode))
-return true;
-
   if (FP_REG_P (regno))
 {
+  /* Allow 128-bit or 256-bit vector modes in all FPR.  */
+  if (LSX_SUPPORTED_MODE_P (mode)
+ || LASX_SUPPORTED_MODE_P (mode))
+   return true;
+
   if (mclass == MODE_FLOAT
  || mclass == MODE_COMPLEX_FLOAT
  || mclass == MODE_VECTOR_FLOAT)
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 25c1d323ba0..21890a2d94b 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -400,9 +400,6 @@ (define_mode_iterator X [(SI "!TARGET_64BIT") (DI 
"TARGET_64BIT")])
 ;; 64-bit modes for which we provide move patterns.
 (define_mode_iterator MOVE64 [DI DF])
 
-;; 128-bit modes for which we provide move patterns on 64-bit targets.
-(define_mode_iterator MOVE128 [TI TF])
-
 ;; Iterator for sub-32-bit integer modes.
 (define_mode_iterator SHORT [QI HI])
 
@@ -421,12 +418,6 @@ (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
 (define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT")
 (DI "TARGET_DOUBLE_FLOAT")])
 
-;; A mode for which moves involving FPRs may need to be split.
-(define_mode_iterator SPLITF
-  [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
-   (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
-   (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")])
-
 ;; A mode for anything with 32 bits or more, and able to be loaded with
 ;; the same addressing mode as ld.w.
 (define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF])
@@ -2421,41 +2412,6 @@ (define_insn "*movdf_softfloat"
   [(set_attr "move_type" "move,load,store")
(set_attr "mode" "DF")])
 
-;; Emit a doubleword move in which exactly one of the operands is
-;; a floating-point register.  We can't just emit two normal moves
-;; because of the constraints imposed by the FPU register model;
-;; see 

[PATCH 1/2] LoongArch: TFmode is not allowed to be stored in the float register.

2024-07-04 Thread Lulu Cheng
PR target/115752

gcc/ChangeLog:

* config/loongarch/loongarch.cc
(loongarch_hard_regno_mode_ok_uncached): Replace
UNITS_PER_FPVALUE with UNITS_PER_HWFPVALUE.
* config/loongarch/loongarch.h (UNITS_PER_FPVALUE): Delete.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/pr115752.c: New test.
---
 gcc/config/loongarch/loongarch.cc | 2 +-
 gcc/config/loongarch/loongarch.h  | 7 ---
 gcc/testsuite/gcc.target/loongarch/pr115752.c | 8 
 3 files changed, 9 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr115752.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index e2ff2af89e2..803ed0575bd 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6705,7 +6705,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int 
regno, machine_mode mode)
   if (mclass == MODE_FLOAT
  || mclass == MODE_COMPLEX_FLOAT
  || mclass == MODE_VECTOR_FLOAT)
-   return size <= UNITS_PER_FPVALUE;
+   return size <= UNITS_PER_HWFPVALUE;
 
   /* Allow integer modes that fit into a single register.  We need
 to put integers into FPRs when using instructions like CVT
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index b9323aba394..5efeae53be6 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -146,13 +146,6 @@ along with GCC; see the file COPYING3.  If not see
 #define UNITS_PER_HWFPVALUE \
   (TARGET_SOFT_FLOAT ? 0 : UNITS_PER_FP_REG)
 
-/* The largest size of value that can be held in floating-point
-   registers.  */
-#define UNITS_PER_FPVALUE \
-  (TARGET_SOFT_FLOAT ? 0 \
-   : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \
-: LA_LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
-
 /* The number of bytes in a double.  */
 #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
 
diff --git a/gcc/testsuite/gcc.target/loongarch/pr115752.c 
b/gcc/testsuite/gcc.target/loongarch/pr115752.c
new file mode 100644
index 000..df4bae524f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr115752.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+
+long double
+test (long double xx)
+{
+   __asm ("" :: "f"(xx)); /* { dg-error "inconsistent operand constraints in 
an 'asm'" } */
+   return xx + 1;
+}
-- 
2.39.3



Re:[pushed] [PATCH 2/2] LoongArch: Define loongarch_insn_cost and set the cost of movcf2gr and movgr2cf.

2024-07-02 Thread Lulu Cheng

Modified and pushed to r15-1765.

在 2024/7/2 上午11:50, Xi Ruoyao 写道:

On Tue, 2024-07-02 at 11:22 +0800, Lulu Cheng wrote:

+static int
+loongarch_insn_cost (rtx_insn *insn, bool speed)
+{
+  rtx x = PATTERN (insn);
+  int cost = pattern_cost (x, speed);
+
+  /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr.  */
+  if (TARGET_uARCH_LA464 && GET_CODE (x) == SET
+  && GET_MODE (XEXP (x, 0)) == FCCmode)
+    {
+  rtx dest, src;
+  dest = XEXP (x, 0);
+  src = XEXP (x, 1);
+
+  if (REG_P (dest) && REG_P (src))
+   {
+     if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src)))
+       cost = COSTS_N_INSNS (7);

cost = loongarch_cost->movcf2gr;


+     else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src)))
+       cost = COSTS_N_INSNS (15);

cost = loongarch_cost->movgr2cf;

Then we don't need to check TARGET_uARCH_LA464.


+   }
+    }
+  return cost;
+}




Re: [pushed][PATCH 1/2] LoongArch: Fix explicit-relocs-{extreme-,}tls-desc.c tests.

2024-07-02 Thread Lulu Cheng

Pushed to r15-1764.

在 2024/7/2 上午11:21, Lulu Cheng 写道:

After r15-1579, ADD and LD/ST pairs will be merged into LDX/STX.
Cause these two tests to fail. To guarantee that these two tests pass,
add the compilation option '-fno-late-combine-instructions'.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c:
Add compilation options '-fno-late-combine-instructions'.
* gcc.target/loongarch/explicit-relocs-tls-desc.c: Likewise.
---
  .../gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c | 2 +-
  gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c   | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
index 3797556e1e6..e9eb0d6f703 100644
--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc 
-mcmodel=extreme" } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc -mcmodel=extreme 
-fno-late-combine-instructions" } */
  
  __thread int a __attribute__((visibility("hidden")));

  extern __thread int b __attribute__((visibility("default")));
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
index f6690309156..fed478458a3 100644
--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc 
-fno-late-combine-instructions" } */
  
  __thread int a __attribute__((visibility("hidden")));

  extern __thread int b __attribute__((visibility("default")));




Re: [PATCH 2/2] LoongArch: Define loongarch_insn_cost and set the cost of movcf2gr and movgr2cf.

2024-07-01 Thread Lulu Cheng



在 2024/7/2 上午11:50, Xi Ruoyao 写道:

On Tue, 2024-07-02 at 11:22 +0800, Lulu Cheng wrote:

+static int
+loongarch_insn_cost (rtx_insn *insn, bool speed)
+{
+  rtx x = PATTERN (insn);
+  int cost = pattern_cost (x, speed);
+
+  /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr.  */
+  if (TARGET_uARCH_LA464 && GET_CODE (x) == SET
+  && GET_MODE (XEXP (x, 0)) == FCCmode)
+    {
+  rtx dest, src;
+  dest = XEXP (x, 0);
+  src = XEXP (x, 1);
+
+  if (REG_P (dest) && REG_P (src))
+   {
+     if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src)))
+       cost = COSTS_N_INSNS (7);

cost = loongarch_cost->movcf2gr;


+     else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src)))
+       cost = COSTS_N_INSNS (15);

cost = loongarch_cost->movgr2cf;

Then we don't need to check TARGET_uARCH_LA464.


Ok! I'll merge it after the revisions.

Thanks.




+   }
+    }
+  return cost;
+}




[PATCH 2/2] LoongArch: Define loongarch_insn_cost and set the cost of movcf2gr and movgr2cf.

2024-07-01 Thread Lulu Cheng
The following two FAIL items have been fixed:

FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler 
movcf2fr\\t\$f[0-9]+,\$fcc
FAIL: gcc.target/loongarch/movcf2gr-via-fr.c scan-assembler 
movfr2gr.s\\t\$r4

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_insn_cost):
New function.
(TARGET_INSN_COST): New macro.
---
 gcc/config/loongarch/loongarch.cc | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 0fb547e00f4..cf21c365605 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4372,6 +4372,34 @@ loongarch_address_cost (rtx addr, machine_mode mode,
   return loongarch_address_insns (addr, mode, false);
 }
 
+/* Implement TARGET_INSN_COST.  */
+
+static int
+loongarch_insn_cost (rtx_insn *insn, bool speed)
+{
+  rtx x = PATTERN (insn);
+  int cost = pattern_cost (x, speed);
+
+  /* On LA464, prevent movcf2fr and movfr2gr from merging into movcf2gr.  */
+  if (TARGET_uARCH_LA464 && GET_CODE (x) == SET
+  && GET_MODE (XEXP (x, 0)) == FCCmode)
+{
+  rtx dest, src;
+  dest = XEXP (x, 0);
+  src = XEXP (x, 1);
+
+  if (REG_P (dest) && REG_P (src))
+   {
+ if (GP_REG_P (REGNO (dest)) && FCC_REG_P (REGNO (src)))
+   cost = COSTS_N_INSNS (7);
+ else if (FCC_REG_P (REGNO (dest)) && GP_REG_P (REGNO (src)))
+   cost = COSTS_N_INSNS (15);
+   }
+}
+  return cost;
+}
+
+
 /* Return one word of double-word value OP, taking into account the fixed
endianness of certain registers.  HIGH_P is true to select the high part,
false to select the low part.  */
@@ -11105,6 +11133,8 @@ loongarch_asm_code_end (void)
 #define TARGET_RTX_COSTS loongarch_rtx_costs
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST loongarch_address_cost
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST loongarch_insn_cost
 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   loongarch_builtin_vectorization_cost
-- 
2.39.3



[PATCH 1/2] LoongArch: Fix explicit-relocs-{extreme-, }tls-desc.c tests.

2024-07-01 Thread Lulu Cheng
After r15-1579, ADD and LD/ST pairs will be merged into LDX/STX.
Cause these two tests to fail. To guarantee that these two tests pass,
add the compilation option '-fno-late-combine-instructions'.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c:
Add compilation options '-fno-late-combine-instructions'.
* gcc.target/loongarch/explicit-relocs-tls-desc.c: Likewise.
---
 .../gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
index 3797556e1e6..e9eb0d6f703 100644
--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-tls-desc.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc 
-mcmodel=extreme" } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc 
-mcmodel=extreme -fno-late-combine-instructions" } */
 
 __thread int a __attribute__((visibility("hidden")));
 extern __thread int b __attribute__((visibility("default")));
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
index f6690309156..fed478458a3 100644
--- a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-tls-desc.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc" } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs -mtls-dialect=desc 
-fno-late-combine-instructions" } */
 
 __thread int a __attribute__((visibility("hidden")));
 extern __thread int b __attribute__((visibility("default")));
-- 
2.39.3



Re: Ping: [PATCH v2] LoongArch: Tweak IOR rtx_cost for bstrins

2024-06-26 Thread Lulu Cheng

LGTM!

Thanks very much!


在 2024/6/26 下午3:53, Xi Ruoyao 写道:

Ping.

On Sun, 2024-06-16 at 01:50 +0800, Xi Ruoyao wrote:

Consider

     c &= 0xfff;
     a &= ~0xfff;
     b &= ~0xfff;
     a |= c;
     b |= c;

This can be done with 2 bstrins instructions.  But we need to
recognize
it in loongarch_rtx_costs or the compiler will not propagate "c &
0xfff"
forward.

gcc/ChangeLog:

* config/loongarch/loongarch.cc:
(loongarch_use_bstrins_for_ior_with_mask): Split the main
logic
into ...
(loongarch_use_bstrins_for_ior_with_mask_1): ... here.
(loongarch_rtx_costs): Special case for IOR those can be
implemented with bstrins.

gcc/testsuite/ChangeLog;

* gcc.target/loongarch/bstrins-3.c: New test.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?

  gcc/config/loongarch/loongarch.cc | 73 ++
-
  .../gcc.target/loongarch/bstrins-3.c  | 16 
  2 files changed, 72 insertions(+), 17 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-3.c

diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index 6ec3ee62502..256b76d044b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3681,6 +3681,27 @@ loongarch_set_reg_reg_piece_cost (machine_mode
mode, unsigned int units)
    return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
  }
  
+static int

+loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode,
+      unsigned HOST_WIDE_INT
mask1,
+      unsigned HOST_WIDE_INT
mask2)
+{
+  if (mask1 != ~mask2 || !mask1 || !mask2)
+    return 0;
+
+  /* Try to avoid a right-shift.  */
+  if (low_bitmask_len (mode, mask1) != -1)
+    return -1;
+
+  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
+    return 1;
+
+  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
+    return -1;
+
+  return 0;
+}
+
  /* Return the cost of moving between two registers of mode MODE.  */
  
  static int

@@ -3812,6 +3833,38 @@ loongarch_rtx_costs (rtx x, machine_mode mode,
int outer_code,
    /* Fall through.  */
  
  case IOR:

+  {
+   rtx op[2] = {XEXP (x, 0), XEXP (x, 1)};
+   if (GET_CODE (op[0]) == AND && GET_CODE (op[1]) == AND
+       && (mode == SImode || (TARGET_64BIT && mode == DImode)))
+     {
+       rtx rtx_mask0 = XEXP (op[0], 1), rtx_mask1 = XEXP (op[1],
1);
+       if (CONST_INT_P (rtx_mask0) && CONST_INT_P (rtx_mask1))
+     {
+   unsigned HOST_WIDE_INT mask0 = UINTVAL (rtx_mask0);
+   unsigned HOST_WIDE_INT mask1 = UINTVAL (rtx_mask1);
+   if (loongarch_use_bstrins_for_ior_with_mask_1 (mode,
+      mask0,
+   
mask1))
+     {
+       /* A bstrins instruction */
+       *total = COSTS_N_INSNS (1);
+
+       /* A srai instruction */
+       if (low_bitmask_len (mode, mask0) == -1
+   && low_bitmask_len (mode, mask1) == -1)
+     *total += COSTS_N_INSNS (1);
+
+       for (int i = 0; i < 2; i++)
+     *total += set_src_cost (XEXP (op[i], 0), mode,
speed);
+
+       return true;
+     }
+     }
+     }
+  }
+
+  /* Fall through.  */
  case XOR:
    /* Double-word operations use two single-word operations.  */
    *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
COSTS_N_INSNS (2),
@@ -5796,23 +5849,9 @@ bool loongarch_pre_reload_split (void)
  int
  loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
  {
-  unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
-  unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
-
-  if (mask1 != ~mask2 || !mask1 || !mask2)
-    return 0;
-
-  /* Try to avoid a right-shift.  */
-  if (low_bitmask_len (mode, mask1) != -1)
-    return -1;
-
-  if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
-    return 1;
-
-  if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
-    return -1;
-
-  return 0;
+  return loongarch_use_bstrins_for_ior_with_mask_1 (mode,
+       UINTVAL (op[2]),
+       UINTVAL (op[4]));
  }
  
  /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto

diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
new file mode 100644
index 000..13762bdef42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bstrins-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-final" } */
+/* { dg-final { scan-rtl-dump-times "insv\[sd\]i" 2 "final" } } */
+
+struct X {
+  long a, b;
+};

Re: Ping: [PATCH] LoongArch: Only transform move/move/bstrins to srai/bstrins when -Os

2024-06-26 Thread Lulu Cheng





  ;; We always avoid the shift operation in bstrins__for_ior_mask
-;; if possible, but the result may be sub-optimal when one of the
masks
+;; if possible, but the result may be larger when one of the masks
  ;; is (1 << N) - 1 and one of the src register is the dest register.
  ;; For example:
  ;; move   t0, a0
  ;; move   a0, a1
  ;; bstrins.d  a0, t0, 42, 0
  ;; ret
-;; using a shift operation would be better:
+;; using a shift operation would be smaller:
  ;; srai.d t0, a1, 43
  ;; bstrins.d  a0, t0, 63, 43
  ;; ret
  ;; unfortunately we cannot figure it out in split1: before reload we
cannot
  ;; know if the dest register is one of the src register.  Fix it up
in
  ;; peephole2.
+;;
+;; Note that the first form has a lower latency so this should only


The result of my test is that the latency of these two forms is the 
same, is there a problem with my test?




be
+;; done when optimizing for size.
  (define_peephole2
    [(set (match_operand:GPR 0 "register_operand")
    (match_operand:GPR 1 "register_operand"))
@@ -1639,7 +1642,7 @@ (define_peephole2
      (match_operand:SI 3 "const_int_operand")
      (const_int 0))
    (match_dup 0))]
-  "peep2_reg_dead_p (3, operands[0])"
+  "peep2_reg_dead_p (3, operands[0]) && optimize_insn_for_size_p ()"
    [(const_int 0)]
    {
  int len = GET_MODE_BITSIZE (mode) - INTVAL (operands[3]);




Re: [PATCH] LoongArch: NFC: Dedup and sort the comment in loongarch_print_operand_reloc

2024-06-17 Thread Lulu Cheng

I think that's fine.

Thanks!

在 2024/6/16 下午5:11, Xi Ruoyao 写道:

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_print_operand_reloc):
Dedup and sort the comment describing modifiers.
---

It's a non-functional change thus I've not tested it.  Ok for trunk?

  gcc/config/loongarch/loongarch.cc | 10 +-
  1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 256b76d044b..dcb32a96577 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6132,21 +6132,13 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool 
hi64_part,
 'T'Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
  'z' for (eq:?I ...), 'n' for (ne:?I ...).
 't'Like 'T', but with the EQ/NE cases reversed
-   'F' Print the FPU branch condition for comparison OP.
-   'W' Print the inverse of the FPU branch condition for comparison OP.
-   'w' Print a LSX register.
 'u'Print a LASX register.
-   'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
- 'z' for (eq:?I ...), 'n' for (ne:?I ...).
-   't' Like 'T', but with the EQ/NE cases reversed
-   'Y' Print loongarch_fp_conditions[INTVAL (OP)]
-   'Z' Print OP and a comma for 8CC, otherwise print nothing.
-   'z' Print $0 if OP is zero, otherwise print OP normally.
 'v'Print the insn size suffix b, h, w or d for vector modes V16QI, 
V8HI,
  V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
 'V'Print exact log2 of CONST_INT OP element 0 of a replicated
  CONST_VECTOR in decimal.
 'W'Print the inverse of the FPU branch condition for comparison OP.
+   'w' Print a LSX register.
 'X'Print CONST_INT OP in hexadecimal format.
 'x'Print the low 16 bits of CONST_INT OP in hexadecimal format.
 'Y'Print loongarch_fp_conditions[INTVAL (OP)]




Re: [PATCH] LoongArch: Use bstrins for "value & (-1u << const)"

2024-06-12 Thread Lulu Cheng

LGTM!

Thanks!

在 2024/6/9 下午9:48, Xi Ruoyao 写道:

A move/bstrins pair is as fast as a (addi.w|lu12i.w|lu32i.d|lu52i.d)/and
pair, and twice fast as a srli/slli pair.  When the src reg and the dst
reg happens to be the same, the move instruction can be optimized away.

gcc/ChangeLog:

* config/loongarch/predicates.md (high_bitmask_operand): New
predicate.
* config/loongarch/constraints.md (Yy): New constriant.
* config/loongarch/loongarch.md (and3_align): New
define_insn_and_split.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/bstrins-1.c: New test.
* gcc.target/loongarch/bstrins-2.c: New test.
---

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?

  gcc/config/loongarch/constraints.md|  5 +
  gcc/config/loongarch/loongarch.md  | 17 +
  gcc/config/loongarch/predicates.md |  4 
  gcc/testsuite/gcc.target/loongarch/bstrins-1.c |  9 +
  gcc/testsuite/gcc.target/loongarch/bstrins-2.c | 14 ++
  5 files changed, 49 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-1.c
  create mode 100644 gcc/testsuite/gcc.target/loongarch/bstrins-2.c

diff --git a/gcc/config/loongarch/constraints.md 
b/gcc/config/loongarch/constraints.md
index f07d31650d2..12cf5e2924a 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -94,6 +94,7 @@
  ;;   "A constant @code{move_operand} that can be safely loaded using
  ;;  @code{la}."
  ;;"Yx"
+;;"Yy"
  ;; "Z" -
  ;;"ZC"
  ;;  "A memory operand whose address is formed by a base register and 
offset
@@ -291,6 +292,10 @@ (define_constraint "Yx"
 "@internal"
 (match_operand 0 "low_bitmask_operand"))
  
+(define_constraint "Yy"

+   "@internal"
+   (match_operand 0 "high_bitmask_operand"))
+
  (define_constraint "YI"
"@internal
 A replicated vector const in which the replicated value is in the range
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 5c80c169cbf..25c1d323ba0 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1542,6 +1542,23 @@ (define_insn "and3_extended"
[(set_attr "move_type" "pick_ins")
 (set_attr "mode" "")])
  
+(define_insn_and_split "and3_align"

+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (and:GPR (match_operand:GPR 1 "register_operand" "r")
+(match_operand:GPR 2 "high_bitmask_operand" "Yy")))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 1))
+   (set (zero_extract:GPR (match_dup 0) (match_dup 2) (const_int 0))
+   (const_int 0))]
+{
+  int len;
+
+  len = low_bitmask_len (mode, ~INTVAL (operands[2]));
+  operands[2] = GEN_INT (len);
+})
+
  (define_insn_and_split "*bstrins__for_mask"
[(set (match_operand:GPR 0 "register_operand" "=r")
(and:GPR (match_operand:GPR 1 "register_operand" "r")
diff --git a/gcc/config/loongarch/predicates.md 
b/gcc/config/loongarch/predicates.md
index eba7f246c84..58e406ea522 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -293,6 +293,10 @@ (define_predicate "low_bitmask_operand"
(and (match_code "const_int")
 (match_test "low_bitmask_len (mode, INTVAL (op)) > 12")))
  
+(define_predicate "high_bitmask_operand"

+  (and (match_code "const_int")
+   (match_test "low_bitmask_len (mode, ~INTVAL (op)) > 0")))
+
  (define_predicate "d_operand"
(and (match_code "reg")
 (match_test "GP_REG_P (REGNO (op))")))
diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-1.c 
b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c
new file mode 100644
index 000..7cb3a952322
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bstrins-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r4,\\\$r0,4,0" } } */
+
+long
+x (long a)
+{
+  return a & -32;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/bstrins-2.c 
b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c
new file mode 100644
index 000..9777f502e5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/bstrins-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "bstrins\\.d\t\\\$r\[0-9\]+,\\\$r0,4,0" } } */
+
+struct aligned_buffer {
+  _Alignas(32) char x[1024];
+};
+
+extern int f(char *);
+int g(void)
+{
+  struct aligned_buffer buf;
+  return f(buf.x);
+}




Re: [PATCH] LoongArch: Fix mode size comparision in loongarch_expand_conditional_move

2024-06-12 Thread Lulu Cheng



在 2024/6/12 上午11:06, Xi Ruoyao 写道:

We were comparing a mode size with word_mode, but word_mode is an enum
value thus this does not really make any sense.  (Un)luckily E_DImode
happens to be 8 so this seemed to work, but let's make it correct so it
won't blow up when we add LA32 support or add another machine mode...

gcc/ChangeLog:

* config/loongarch/loongarch.cc
(loongarch_expand_conditional_move): Compare mode size with
UNITS_PER_WORD instead of word_mode.
---

I've not fully tested this but it should be obvious.  Ok for trunk?

  gcc/config/loongarch/loongarch.cc | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index eb132f06c2e..bc3dd2b713e 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5371,7 +5371,7 @@ loongarch_expand_conditional_move (rtx *operands)
  loongarch_emit_float_compare (, , );
else
  {
-  if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode)
+  if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD)
{
  promote_op[0] = (REG_P (op0) && REG_P (operands[2]) &&
   REGNO (op0) == REGNO (operands[2]));


This is indeed a bug and I don't think there is a problem with this change.

Thanks!



Re: [PATCH 47/52] loongarch: New hook implementation loongarch_c_mode_for_floating_type

2024-06-03 Thread Lulu Cheng

Ok! Thanks!

Lulu Cheng

在 2024/6/3 上午11:01, Kewen Lin 写道:

This is to add new port specific hook implementation
loongarch_c_mode_for_floating_type, remove macro
defines for FLOAT_TYPE_SIZE and DOUBLE_TYPE_SIZE, and
rename LONG_DOUBLE_TYPE_SIZE to LA_LONG_DOUBLE_TYPE_SIZE
as we poison LONG_DOUBLE_TYPE_SIZE but some macros need
LONG_DOUBLE_TYPE_SIZE.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_c_mode_for_floating_type):
New function.
(TARGET_C_MODE_FOR_FLOATING_TYPE): New macro.
* config/loongarch/loongarch.h (FLOAT_TYPE_SIZE): Remove.
(DOUBLE_TYPE_SIZE): Remove.
(LONG_DOUBLE_TYPE_SIZE): Rename to ...
(LA_LONG_DOUBLE_TYPE_SIZE): ... this.
(UNITS_PER_FPVALUE): Replace LONG_DOUBLE_TYPE_SIZE with
LA_LONG_DOUBLE_TYPE_SIZE.
(MAX_FIXED_MODE_SIZE): Likewise.
(STRUCTURE_SIZE_BOUNDARY): Likewise.
(BIGGEST_ALIGNMENT): Likewise.
---
  gcc/config/loongarch/loongarch.cc | 15 +++
  gcc/config/loongarch/loongarch.h  | 13 ++---
  2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index e7835ae34ae..e4a51303d22 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -10946,6 +10946,18 @@ loongarch_builtin_support_vector_misalignment 
(machine_mode mode,
  is_packed);
  }
  
+/* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return TFmode or DFmode

+   for TI_LONG_DOUBLE_TYPE which is for long double type, go with the
+   default one for the others.  */
+
+static machine_mode
+loongarch_c_mode_for_floating_type (enum tree_index ti)
+{
+  if (ti == TI_LONG_DOUBLE_TYPE)
+return TARGET_64BIT ? TFmode : DFmode;
+  return default_mode_for_floating_type (ti);
+}
+
  static bool
  use_rsqrt_p (void)
  {
@@ -11256,6 +11268,9 @@ loongarch_asm_code_end (void)
  #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
loongarch_builtin_support_vector_misalignment
  
+#undef TARGET_C_MODE_FOR_FLOATING_TYPE

+#define TARGET_C_MODE_FOR_FLOATING_TYPE loongarch_c_mode_for_floating_type
+
  struct gcc_target targetm = TARGET_INITIALIZER;
  
  #include "gt-loongarch.h"

diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index f7fe950f333..b9323aba394 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -151,7 +151,7 @@ along with GCC; see the file COPYING3.  If not see
  #define UNITS_PER_FPVALUE \
(TARGET_SOFT_FLOAT ? 0 \
 : TARGET_SINGLE_FLOAT ? UNITS_PER_FP_REG \
-: LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
+: LA_LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT)
  
  /* The number of bytes in a double.  */

  #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
@@ -162,9 +162,8 @@ along with GCC; see the file COPYING3.  If not see
  #define LONG_TYPE_SIZE (TARGET_64BIT ? 64 : 32)
  #define LONG_LONG_TYPE_SIZE 64
  
-#define FLOAT_TYPE_SIZE 32

-#define DOUBLE_TYPE_SIZE 64
-#define LONG_DOUBLE_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
+/* LONG_DOUBLE_TYPE_SIZE get poisoned, so add LA_ prefix.  */
+#define LA_LONG_DOUBLE_TYPE_SIZE (TARGET_64BIT ? 128 : 64)
  
  /* Define the sizes of fixed-point types.  */

  #define SHORT_FRACT_TYPE_SIZE 8
@@ -179,7 +178,7 @@ along with GCC; see the file COPYING3.  If not see
  
  /* long double is not a fixed mode, but the idea is that, if we

 support long double, we also want a 128-bit integer type.  */
-#define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE
+#define MAX_FIXED_MODE_SIZE LA_LONG_DOUBLE_TYPE_SIZE
  
  /* Width in bits of a pointer.  */

  #ifndef POINTER_SIZE
@@ -200,11 +199,11 @@ along with GCC; see the file COPYING3.  If not see
  #define STRUCTURE_SIZE_BOUNDARY 8
  
  /* There is no point aligning anything to a rounder boundary than

-   LONG_DOUBLE_TYPE_SIZE, unless under LSX/LASX the bigggest alignment is
+   LA_LONG_DOUBLE_TYPE_SIZE, unless under LSX/LASX the bigggest alignment is
 BITS_PER_LSX_REG/BITS_PER_LASX_REG/..  */
  #define BIGGEST_ALIGNMENT \
(ISA_HAS_LASX? BITS_PER_LASX_REG \
-   : (ISA_HAS_LSX ? BITS_PER_LSX_REG : LONG_DOUBLE_TYPE_SIZE))
+   : (ISA_HAS_LSX ? BITS_PER_LSX_REG : LA_LONG_DOUBLE_TYPE_SIZE))
  
  /* All accesses must be aligned.  */

  #define STRICT_ALIGNMENT (TARGET_STRICT_ALIGN)




Re: [PATCH] LoongArch: Guard REGNO with REG_P in loongarch_expand_conditional_move [PR115169]

2024-05-23 Thread Lulu Cheng

LGTM!

Thanks!

在 2024/5/22 下午7:24, Xi Ruoyao 写道:

gcc/ChangeLog:

PR target/115169
* config/loongarch/loongarch.cc
(loongarch_expand_conditional_move): Guard REGNO with REG_P.
---

Bootstrapped with --enable-checking=all.  Ok for trunk and 14?

  gcc/config/loongarch/loongarch.cc | 17 -
  1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index e7835ae34ae..1b6df6a4365 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5344,6 +5344,7 @@ loongarch_expand_conditional_move (rtx *operands)
rtx op1_extend = op1;
  
/* Record whether operands[2] and operands[3] modes are promoted to word_mode.  */

+  bool promote_op[2] = {false, false};
bool promote_p = false;
machine_mode mode = GET_MODE (operands[0]);
  
@@ -5351,9 +5352,15 @@ loongarch_expand_conditional_move (rtx *operands)

  loongarch_emit_float_compare (, , );
else
  {
-  if ((REGNO (op0) == REGNO (operands[2])
-  || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
- && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+  if (GET_MODE_SIZE (GET_MODE (op0)) < word_mode)
+   {
+ promote_op[0] = (REG_P (op0) && REG_P (operands[2]) &&
+  REGNO (op0) == REGNO (operands[2]));
+ promote_op[1] = (REG_P (op1) && REG_P (operands[3]) &&
+  REGNO (op1) == REGNO (operands[3]));
+   }
+
+  if (promote_op[0] || promote_op[1])
{
  mode = word_mode;
  promote_p = true;
@@ -5395,7 +5402,7 @@ loongarch_expand_conditional_move (rtx *operands)
  
if (promote_p)

{
- if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+ if (promote_op[0])
op2 = op0_extend;
  else
{
@@ -5403,7 +5410,7 @@ loongarch_expand_conditional_move (rtx *operands)
  op2 = force_reg (mode, op2);
}
  
-	  if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))

+ if (promote_op[1])
op3 = op1_extend;
  else
{




Re: [pushed] [PATCH v4 1/2] LoongArch: Define ISA versions

2024-05-07 Thread Lulu Cheng



在 2024/5/7 下午5:42, Xi Ruoyao 写道:

On Tue, 2024-05-07 at 17:07 +0800, Xi Ruoyao wrote:

Hmm, after this change the default (-march=la64v1.0) is enabling LSX:

$ echo "int dummy;" | cc -c -v |& tail -n1
COLLECT_GCC_OPTIONS='-c' '-v' '-mabi=lp64d' '-march=la64v1.0' '-
mfpu=64'
'-msimd=lsx' '-mcmodel=normal' '-mtune=generic'

Is this expected or there's something wrong?

Note that
https://github.com/loongson/la-toolchain-conventions?tab=readme-ov-file#configuring-the-target-isa
says:

LoongArch V1.1 features:

Enable or disable features introduced by LoongArch V1.1. The LSX / LASX
part of the LoongArch v1.1 update should only be enabled with lsx / lasx
itself enabled.

So to me -march=la64v1.0 should not imply -mlsx.



The link 
https://github.com/loongson/la-toolchain-conventions?tab=readme-ov-file#target-presets 
has a detailed description of -march.

-march=la64v1.0 will open lsx by default.





On Tue, 2024-04-23 at 11:31 +0800, Lulu Cheng wrote:

Pushed to r14-10083.

在 2024/4/23 上午10:42, Yang Yujie 写道:

These ISA versions are defined as -march= parameters and
are recommended for building binaries for distribution.

Detailed description of these definitions can be found at
https://github.com/loongson/la-toolchain-conventions, which
the LoongArch GCC port aims to conform to.

gcc/ChangeLog:

* config.gcc: Make la64v1.0 the default ISA preset of the
lp64d ABI.
* config/loongarch/genopts/loongarch-strings: Define
la64v1.0, la64v1.1.
* config/loongarch/genopts/loongarch.opt.in: Likewise.
* config/loongarch/loongarch-c.cc
(LARCH_CPP_SET_PROCESSOR): Likewise.
(loongarch_cpu_cpp_builtins): Likewise.
* config/loongarch/loongarch-cpu.cc (get_native_prid):
Likewise.
(fill_native_cpu_config): Likewise.
* config/loongarch/loongarch-def.cc (array_tune):
Likewise.
* config/loongarch/loongarch-def.h: Likewise.
* config/loongarch/loongarch-driver.cc
(driver_set_m_parm):
Likewise.
(driver_get_normalized_m_opts): Likewise.
* config/loongarch/loongarch-opts.cc
(default_tune_for_arch): Likewise.
(TUNE_FOR_ARCH): Likewise.
(arch_str): Likewise.
(loongarch_target_option_override): Likewise.
* config/loongarch/loongarch-opts.h (TARGET_uARCH_LA464):
Likewise.
(TARGET_uARCH_LA664): Likewise.
* config/loongarch/loongarch-str.h (STR_CPU_ABI_DEFAULT):
Likewise.
(STR_ARCH_ABI_DEFAULT): Likewise.
(STR_TUNE_GENERIC): Likewise.
(STR_ARCH_LA64V1_0): Likewise.
(STR_ARCH_LA64V1_1): Likewise.
* config/loongarch/loongarch.cc
(loongarch_cpu_sched_reassociation_width): Likewise.
(loongarch_asm_code_end): Likewise.
* config/loongarch/loongarch.opt: Likewise.
* doc/invoke.texi: Likewise.
---
   gcc/config.gcc    | 34 
   .../loongarch/genopts/loongarch-strings   |  5 +-
   gcc/config/loongarch/genopts/loongarch.opt.in | 43 --
   gcc/config/loongarch/loongarch-c.cc   | 37 +++--
   gcc/config/loongarch/loongarch-cpu.cc | 35 
   gcc/config/loongarch/loongarch-def.cc | 83
+--

   gcc/config/loongarch/loongarch-def.h  | 37 ++---
   gcc/config/loongarch/loongarch-driver.cc  |  8 +-
   gcc/config/loongarch/loongarch-opts.cc    | 66 +++--
--
   gcc/config/loongarch/loongarch-opts.h |  4 +-
   gcc/config/loongarch/loongarch-str.h  |  5 +-
   gcc/config/loongarch/loongarch.cc | 11 +--
   gcc/config/loongarch/loongarch.opt    | 43 --
   gcc/doc/invoke.texi   | 57 -
   14 files changed, 300 insertions(+), 168 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5df3c52f8e9..929695c25ab 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5072,7 +5072,7 @@ case "${target}" in
   
   		# Perform initial sanity checks on --with-*

options.
    case ${with_arch} in
-   "" | abi-default | loongarch64 | la[46]64) ;; #
OK,
append here.
+   "" | la64v1.[01] | abi-default | loongarch64 |
la[46]64) ;; # OK, append here.
    native)
    if test x${host} != x${target}; then
    echo "--with-arch=native is
illegal
for cross-compiler." 1>&2
@@ -5119,10 +5119,18 @@ case "${target}" in
   
   		# Infer ISA-related default options from the ABI:

pass 1
    case ${abi_base}/${abi_ext} in
-   lp64*/base)
+   lp64d/base)
    # architectures that support lp64* ABI
-   arch_pattern="native|abi-
default|loongarch64|la[46]64"
-   # default architecture for lp64* ABI
+   arch_pattern="native|abi-
default|la64v1.[01]|loongarch64|la[46]64"
+
+   

Re: [pushed][PATCH][gcc-13] LoongArch: Fix eh_return epilogue for normal returns.

2024-04-29 Thread Lulu Cheng

Pushed to r13-8661.

在 2024/4/29 下午4:09, Lulu Cheng 写道:

From: Yang Yujie 

On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved
and restored in the function prologue and epilogue if the given function calls
__builtin_eh_return.  This causes the return value to be overwritten on normal
return paths and breaks a rare case of libgcc's _Unwind_RaiseException.

gcc/ChangeLog:

PR target/114848
* config/loongarch/loongarch.cc: Do not restore the saved eh_return
data registers ($r4-$r7) for a normal return of a function that calls
__builtin_eh_return elsewhere.
* config/loongarch/loongarch-protos.h: Same.
* config/loongarch/loongarch.md: Same.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/eh_return-normal-return.c: New test.

(cherry picked from commit 4b421728289e6f1caa0dfaa953a11698ab95d37d)
---
  gcc/config/loongarch/loongarch-protos.h   |  2 +-
  gcc/config/loongarch/loongarch.cc | 35 -
  gcc/config/loongarch/loongarch.md | 23 ++-
  .../loongarch/eh_return-normal-return.c   | 38 +++
  4 files changed, 85 insertions(+), 13 deletions(-)
  create mode 100644 
gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 35cc77c7367..0f608ee5179 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -60,7 +60,7 @@ enum loongarch_symbol_type {
  extern rtx loongarch_emit_move (rtx, rtx);
  extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
  extern void loongarch_expand_prologue (void);
-extern void loongarch_expand_epilogue (bool);
+extern void loongarch_expand_epilogue (int);
  extern bool loongarch_can_use_return_insn (void);
  
  extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index f47a5fc2ad7..2238858cd6a 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1012,7 +1012,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, 
HOST_WIDE_INT offset,
  
  static void

  loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
- loongarch_save_restore_fn fn)
+ loongarch_save_restore_fn fn,
+ bool skip_eh_data_regs_p)
  {
HOST_WIDE_INT offset;
  
@@ -1021,7 +1022,15 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,

for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
  if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
{
-   loongarch_save_restore_reg (word_mode, regno, offset, fn);
+   /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
+  when returning normally from a function that calls
+  __builtin_eh_return.  In this case, these registers are saved but
+  should not be restored, or the return value may be clobbered.  */
+
+   if (!(skip_eh_data_regs_p
+ && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4))
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
offset -= UNITS_PER_WORD;
}
  
@@ -1290,7 +1299,7 @@ loongarch_expand_prologue (void)

GEN_INT (-step1));
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
size -= step1;
-  loongarch_for_each_saved_reg (size, loongarch_save_reg);
+  loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
  }
  
/* Set up the frame pointer, if we're using one.  */

@@ -1375,11 +1384,13 @@ loongarch_can_use_return_insn (void)
return reload_completed && cfun->machine->frame.total_size == 0;
  }
  
-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P

-   says which.  */
+/* Expand function epilogue using the following insn patterns:
+   "epilogue"  (style == NORMAL_RETURN)
+   "sibcall_epilogue" (style == SIBCALL_RETURN)
+   "eh_return" (style == EXCEPTION_RETURN) */
  
  void

-loongarch_expand_epilogue (bool sibcall_p)
+loongarch_expand_epilogue (int style)
  {
/* Split the frame into two.  STEP1 is the amount of stack we should
   deallocate before restoring the registers.  STEP2 is the amount we
@@ -1396,7 +1407,8 @@ loongarch_expand_epilogue (bool sibcall_p)
bool need_barrier_p
  = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
  
-  if (!sibcall_p && loongarch_can_use_return_insn ())

+  /* Handle simple returns.  */
+  if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
  {
emit_jump_insn (gen_return ());
return;
@@ -1472,7 +1484,9 @@ loongarch_expand_epilog

Re: [pushed][PATCH][gcc-12] LoongArch: Fix eh_return epilogue for normal returns.

2024-04-29 Thread Lulu Cheng

Pushed to r12-10403.

在 2024/4/29 下午4:09, Lulu Cheng 写道:

From: Yang Yujie 

On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved
and restored in the function prologue and epilogue if the given function calls
__builtin_eh_return.  This causes the return value to be overwritten on normal
return paths and breaks a rare case of libgcc's _Unwind_RaiseException.

gcc/ChangeLog:

PR target/114848
* config/loongarch/loongarch.cc: Do not restore the saved eh_return
data registers ($r4-$r7) for a normal return of a function that calls
__builtin_eh_return elsewhere.
* config/loongarch/loongarch-protos.h: Same.
* config/loongarch/loongarch.md: Same.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/eh_return-normal-return.c: New test.

(cherry picked from commit 4b421728289e6f1caa0dfaa953a11698ab95d37d)
---
  gcc/config/loongarch/loongarch-protos.h   |  2 +-
  gcc/config/loongarch/loongarch.cc | 35 -
  gcc/config/loongarch/loongarch.md | 23 ++-
  .../loongarch/eh_return-normal-return.c   | 38 +++
  4 files changed, 85 insertions(+), 13 deletions(-)
  create mode 100644 
gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 2144c2421ed..8af82ffaa20 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -47,7 +47,7 @@ enum loongarch_symbol_type {
  extern rtx loongarch_emit_move (rtx, rtx);
  extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
  extern void loongarch_expand_prologue (void);
-extern void loongarch_expand_epilogue (bool);
+extern void loongarch_expand_epilogue (int);
  extern bool loongarch_can_use_return_insn (void);
  
  extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 33b1919e7a3..a0e11f2fc66 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1008,7 +1008,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, 
HOST_WIDE_INT offset,
  
  static void

  loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
- loongarch_save_restore_fn fn)
+ loongarch_save_restore_fn fn,
+ bool skip_eh_data_regs_p)
  {
HOST_WIDE_INT offset;
  
@@ -1017,7 +1018,15 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,

for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
  if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
{
-   loongarch_save_restore_reg (word_mode, regno, offset, fn);
+   /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
+  when returning normally from a function that calls
+  __builtin_eh_return.  In this case, these registers are saved but
+  should not be restored, or the return value may be clobbered.  */
+
+   if (!(skip_eh_data_regs_p
+ && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4))
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
offset -= UNITS_PER_WORD;
}
  
@@ -1289,7 +1298,7 @@ loongarch_expand_prologue (void)

GEN_INT (-step1));
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
size -= step1;
-  loongarch_for_each_saved_reg (size, loongarch_save_reg);
+  loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
  }
  
  
@@ -1336,11 +1345,13 @@ loongarch_can_use_return_insn (void)

return reload_completed && cfun->machine->frame.total_size == 0;
  }
  
-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P

-   says which.  */
+/* Expand function epilogue using the following insn patterns:
+   "epilogue"  (style == NORMAL_RETURN)
+   "sibcall_epilogue" (style == SIBCALL_RETURN)
+   "eh_return" (style == EXCEPTION_RETURN) */
  
  void

-loongarch_expand_epilogue (bool sibcall_p)
+loongarch_expand_epilogue (int style)
  {
/* Split the frame into two.  STEP1 is the amount of stack we should
   deallocate before restoring the registers.  STEP2 is the amount we
@@ -1357,7 +1368,8 @@ loongarch_expand_epilogue (bool sibcall_p)
bool need_barrier_p
  = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
  
-  if (!sibcall_p && loongarch_can_use_return_insn ())

+  /* Handle simple returns.  */
+  if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
  {
emit_jump_insn (gen_return ());
return;
@@ -1433,7 +1445,9 @@ loongarch_expand_epilogue (bool sibcall_p)
  
/* Restore the registers.  *

[PATCH][gcc-12] LoongArch: Fix eh_return epilogue for normal returns.

2024-04-29 Thread Lulu Cheng
From: Yang Yujie 

On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved
and restored in the function prologue and epilogue if the given function calls
__builtin_eh_return.  This causes the return value to be overwritten on normal
return paths and breaks a rare case of libgcc's _Unwind_RaiseException.

gcc/ChangeLog:

PR target/114848
* config/loongarch/loongarch.cc: Do not restore the saved eh_return
data registers ($r4-$r7) for a normal return of a function that calls
__builtin_eh_return elsewhere.
* config/loongarch/loongarch-protos.h: Same.
* config/loongarch/loongarch.md: Same.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/eh_return-normal-return.c: New test.

(cherry picked from commit 4b421728289e6f1caa0dfaa953a11698ab95d37d)
---
 gcc/config/loongarch/loongarch-protos.h   |  2 +-
 gcc/config/loongarch/loongarch.cc | 35 -
 gcc/config/loongarch/loongarch.md | 23 ++-
 .../loongarch/eh_return-normal-return.c   | 38 +++
 4 files changed, 85 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 2144c2421ed..8af82ffaa20 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -47,7 +47,7 @@ enum loongarch_symbol_type {
 extern rtx loongarch_emit_move (rtx, rtx);
 extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
 extern void loongarch_expand_prologue (void);
-extern void loongarch_expand_epilogue (bool);
+extern void loongarch_expand_epilogue (int);
 extern bool loongarch_can_use_return_insn (void);
 
 extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 33b1919e7a3..a0e11f2fc66 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1008,7 +1008,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, 
HOST_WIDE_INT offset,
 
 static void
 loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
- loongarch_save_restore_fn fn)
+ loongarch_save_restore_fn fn,
+ bool skip_eh_data_regs_p)
 {
   HOST_WIDE_INT offset;
 
@@ -1017,7 +1018,15 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
   {
-   loongarch_save_restore_reg (word_mode, regno, offset, fn);
+   /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
+  when returning normally from a function that calls
+  __builtin_eh_return.  In this case, these registers are saved but
+  should not be restored, or the return value may be clobbered.  */
+
+   if (!(skip_eh_data_regs_p
+ && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4))
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
offset -= UNITS_PER_WORD;
   }
 
@@ -1289,7 +1298,7 @@ loongarch_expand_prologue (void)
GEN_INT (-step1));
   RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
   size -= step1;
-  loongarch_for_each_saved_reg (size, loongarch_save_reg);
+  loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
 }
 
 
@@ -1336,11 +1345,13 @@ loongarch_can_use_return_insn (void)
   return reload_completed && cfun->machine->frame.total_size == 0;
 }
 
-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
-   says which.  */
+/* Expand function epilogue using the following insn patterns:
+   "epilogue"(style == NORMAL_RETURN)
+   "sibcall_epilogue" (style == SIBCALL_RETURN)
+   "eh_return"   (style == EXCEPTION_RETURN) */
 
 void
-loongarch_expand_epilogue (bool sibcall_p)
+loongarch_expand_epilogue (int style)
 {
   /* Split the frame into two.  STEP1 is the amount of stack we should
  deallocate before restoring the registers.  STEP2 is the amount we
@@ -1357,7 +1368,8 @@ loongarch_expand_epilogue (bool sibcall_p)
   bool need_barrier_p
 = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
 
-  if (!sibcall_p && loongarch_can_use_return_insn ())
+  /* Handle simple returns.  */
+  if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
 {
   emit_jump_insn (gen_return ());
   return;
@@ -1433,7 +1445,9 @@ loongarch_expand_epilogue (bool sibcall_p)
 
   /* Restore the registers.  */
   loongarch_for_each_saved_reg (frame->total_size - step2,
-   loongarch_restore_reg);
+   loongarch_restore_reg,
+   crtl->calls_eh_return
+ 

[PATCH][gcc-13] LoongArch: Fix eh_return epilogue for normal returns.

2024-04-29 Thread Lulu Cheng
From: Yang Yujie 

On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved
and restored in the function prologue and epilogue if the given function calls
__builtin_eh_return.  This causes the return value to be overwritten on normal
return paths and breaks a rare case of libgcc's _Unwind_RaiseException.

gcc/ChangeLog:

PR target/114848
* config/loongarch/loongarch.cc: Do not restore the saved eh_return
data registers ($r4-$r7) for a normal return of a function that calls
__builtin_eh_return elsewhere.
* config/loongarch/loongarch-protos.h: Same.
* config/loongarch/loongarch.md: Same.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/eh_return-normal-return.c: New test.

(cherry picked from commit 4b421728289e6f1caa0dfaa953a11698ab95d37d)
---
 gcc/config/loongarch/loongarch-protos.h   |  2 +-
 gcc/config/loongarch/loongarch.cc | 35 -
 gcc/config/loongarch/loongarch.md | 23 ++-
 .../loongarch/eh_return-normal-return.c   | 38 +++
 4 files changed, 85 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 35cc77c7367..0f608ee5179 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -60,7 +60,7 @@ enum loongarch_symbol_type {
 extern rtx loongarch_emit_move (rtx, rtx);
 extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
 extern void loongarch_expand_prologue (void);
-extern void loongarch_expand_epilogue (bool);
+extern void loongarch_expand_epilogue (int);
 extern bool loongarch_can_use_return_insn (void);
 
 extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index f47a5fc2ad7..2238858cd6a 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1012,7 +1012,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, 
HOST_WIDE_INT offset,
 
 static void
 loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
- loongarch_save_restore_fn fn)
+ loongarch_save_restore_fn fn,
+ bool skip_eh_data_regs_p)
 {
   HOST_WIDE_INT offset;
 
@@ -1021,7 +1022,15 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
   {
-   loongarch_save_restore_reg (word_mode, regno, offset, fn);
+   /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
+  when returning normally from a function that calls
+  __builtin_eh_return.  In this case, these registers are saved but
+  should not be restored, or the return value may be clobbered.  */
+
+   if (!(skip_eh_data_regs_p
+ && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4))
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
offset -= UNITS_PER_WORD;
   }
 
@@ -1290,7 +1299,7 @@ loongarch_expand_prologue (void)
GEN_INT (-step1));
   RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
   size -= step1;
-  loongarch_for_each_saved_reg (size, loongarch_save_reg);
+  loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
 }
 
   /* Set up the frame pointer, if we're using one.  */
@@ -1375,11 +1384,13 @@ loongarch_can_use_return_insn (void)
   return reload_completed && cfun->machine->frame.total_size == 0;
 }
 
-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
-   says which.  */
+/* Expand function epilogue using the following insn patterns:
+   "epilogue"(style == NORMAL_RETURN)
+   "sibcall_epilogue" (style == SIBCALL_RETURN)
+   "eh_return"   (style == EXCEPTION_RETURN) */
 
 void
-loongarch_expand_epilogue (bool sibcall_p)
+loongarch_expand_epilogue (int style)
 {
   /* Split the frame into two.  STEP1 is the amount of stack we should
  deallocate before restoring the registers.  STEP2 is the amount we
@@ -1396,7 +1407,8 @@ loongarch_expand_epilogue (bool sibcall_p)
   bool need_barrier_p
 = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
 
-  if (!sibcall_p && loongarch_can_use_return_insn ())
+  /* Handle simple returns.  */
+  if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
 {
   emit_jump_insn (gen_return ());
   return;
@@ -1472,7 +1484,9 @@ loongarch_expand_epilogue (bool sibcall_p)
 
   /* Restore the registers.  */
   loongarch_for_each_saved_reg (frame->total_size - step2,
-   loongarch_restore_reg);
+   loongarch_restore_reg,
+ 

Re: [PATCH] LoongArch: Add constraints for bit string operation define_insn_and_split's [PR114861]

2024-04-26 Thread Lulu Cheng

LGTM!

Thanks.

在 2024/4/26 下午9:52, Xi Ruoyao 写道:

Without the constrants, the compiler attempts to use a stack slot as the
target, causing an ICE building the kernel with -Os:

 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c:3144:1:
 error: could not split insn
 (insn:TI 1764 67 1745
   (set (mem/c:DI (reg/f:DI 3 $r3) [707 %sfp+-80 S8 A64])
(and:DI (reg/v:DI 28 $r28 [orig:422 raster_config ] [422])
(const_int -50331649 [0xfcff])))
   "drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c":1386:21 111
   {*bstrins_di_for_mask}
   (nil))

Add these constrants to fix the issue.

gcc/ChangeLog:

PR target/114861
* config/loongarch/loongarch.md (bstrins__for_mask): Add
constraints for operands.
(bstrins__for_ior_mask): Likewise.

gcc/testsuite/ChangeLog:

PR target/114861
* gcc.target/loongarch/pr114861.c: New test.
---
  gcc/config/loongarch/loongarch.md | 16 
  gcc/testsuite/gcc.target/loongarch/pr114861.c | 39 +++
  2 files changed, 47 insertions(+), 8 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/pr114861.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index a316c8fb820..5c80c169cbf 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1543,9 +1543,9 @@ (define_insn "and3_extended"
 (set_attr "mode" "")])
  
  (define_insn_and_split "*bstrins__for_mask"

-  [(set (match_operand:GPR 0 "register_operand")
-   (and:GPR (match_operand:GPR 1 "register_operand")
-(match_operand:GPR 2 "ins_zero_bitmask_operand")))]
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (and:GPR (match_operand:GPR 1 "register_operand" "r")
+(match_operand:GPR 2 "ins_zero_bitmask_operand" "i")))]
""
"#"
""
@@ -1563,11 +1563,11 @@ (define_insn_and_split "*bstrins__for_mask"
})
  
  (define_insn_and_split "*bstrins__for_ior_mask"

-  [(set (match_operand:GPR 0 "register_operand")
-   (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand")
-  (match_operand:GPR 2 "const_int_operand"))
-(and:GPR (match_operand:GPR 3 "register_operand")
- (match_operand:GPR 4 "const_int_operand"]
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+   (ior:GPR (and:GPR (match_operand:GPR 1 "register_operand" "r")
+ (match_operand:GPR 2 "const_int_operand" "i"))
+(and:GPR (match_operand:GPR 3 "register_operand" "r")
+ (match_operand:GPR 4 "const_int_operand" "i"]
"loongarch_pre_reload_split ()
 && loongarch_use_bstrins_for_ior_with_mask (mode, operands)"
"#"
diff --git a/gcc/testsuite/gcc.target/loongarch/pr114861.c 
b/gcc/testsuite/gcc.target/loongarch/pr114861.c
new file mode 100644
index 000..e6507c406b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr114861.c
@@ -0,0 +1,39 @@
+/* PR114861: ICE building the kernel with -Os
+   Reduced from linux/fs/ntfs3/attrib.c at revision c942a0cd3603.  */
+/* { dg-do compile } */
+/* { dg-options "-Os -march=loongarch64 -msoft-float -mabi=lp64s" } */
+
+long evcn, attr_collapse_range_vbo, attr_collapse_range_bytes;
+unsigned short flags;
+int attr_collapse_range_ni_0_0;
+int *attr_collapse_range_mi;
+unsigned attr_collapse_range_svcn, attr_collapse_range_vcn1;
+void ni_insert_nonresident (unsigned, unsigned short, int **);
+int mi_pack_runs (int);
+int
+attr_collapse_range (void)
+{
+  _Bool __trans_tmp_1;
+  int run = attr_collapse_range_ni_0_0;
+  unsigned evcn1, vcn, end;
+  short a_flags = flags;
+  __trans_tmp_1 = flags & (32768 | 1);
+  if (__trans_tmp_1)
+return 2;
+  vcn = attr_collapse_range_vbo;
+  end = attr_collapse_range_bytes;
+  evcn1 = evcn;
+  for (;;)
+if (attr_collapse_range_svcn >= end)
+  {
+unsigned eat, next_svcn = mi_pack_runs (42);
+attr_collapse_range_vcn1 = (vcn ? vcn : attr_collapse_range_svcn);
+eat = (0 < end) - attr_collapse_range_vcn1;
+mi_pack_runs (run - eat);
+if (next_svcn + eat)
+  ni_insert_nonresident (evcn1 - eat - next_svcn, a_flags,
+ _collapse_range_mi);
+  }
+else
+  return 42;
+}




Re: [pushed][PATCH] wwwdocs: gcc-14/changes.html: Add Loongarch changes.

2024-04-24 Thread Lulu Cheng



在 2024/4/23 上午11:43, Lulu Cheng 写道:

---
  htdocs/gcc-14/changes.html | 156 +
  1 file changed, 156 insertions(+)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 9509487c..f0f0efe0 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -877,6 +877,162 @@ __asm (".global __flmap_lock"  "\n\t"

  
  
+LoongArch

+
+  Support for the following
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-march-7;>
+  -march parameters has been added:
+  
+la64v1.0
+la64v1.1
+la664
+  
+  It is now recommended to use -march=la64v1.0 as the only
+  compiler option to describe the target ISA when building binaries for
+  distribution. For more information on LoongArch ISA versions, see
+  https://github.com/loongson/la-toolchain-conventions/;>
+  Toolchain Conventions of the LoongArch™ Architecture.
+  
+  Support for the following
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mtune-8;>
+  -mtune parameters has been added:
+  
+generic
+la664
+  
+  
+  New ISA Extension
+
+  LSX (Loongson SIMD Extension): Support 128-bit vector instructions
+  and the intrinsics.
+  
+  LASX (Loongson Advanced SIMD Extension): Support 256-bit vector
+  instructions and the intrinsics.
+  
+  FRECIPE: Support frecipe.{s/d} and
+  frsqrte.{s/d} instructions and the intrinsics.
+  
+  DIV32: Support div.w[u] and mod.w[u]
+  instructions with inputs not sign-extended.
+  
+  LAM_BH: Support am{swap/add}[_db].{b/h} instructions.
+  
+  LAMCAS: Support amcas[_db].{b/h/w/d} instructions.
+  
+
+  
+  New Built-in Macros
+
+  __loongarch_arch: Target ISA preset as specified by
+  -march=. For example, compiling with
+  -march=la64v1.0, the value of 
__loongarch_arch
+  is "la64v1.0".
+  
+  __loongarch_tune: Processor model as specified by
+  -mtune or its default value.
+  
+  __loongarch_{simd,sx,asx}: These macros are not defined,
+  or defined as 1.
+  
+  __loongarch_simd_width: The maximum SIMD bit-width
+  enabled by the compiler. (128 for lsx, and 256 for lasx).
+  
+  __loongarch_frecipe: It's defined to 1 or undefined.
+  
+  __loongarch_div32: It's defined to 1 or undefined.
+  __loongarch_lam_bh: It's defined to 1 or undefined.
+  __loongarch_lamcas: It's defined to 1 or undefined.
+  __loongarch_ld_seq_sa: It's defined to 1 or undefined.
+  
+  __loongarch_version_major:
+  The minimally required LoongArch ISA version (major) to run the
+  compiled program, defined to 1 or undefined (iff no such version is
+  known to the compiler).
+  
+  __loongarch_version_minor:
+  The minimally required LoongArch ISA version (minor) to run the
+  compiled program, defined to 0 1 or undefined (iff
+  __loongarch_version_major is undefined).
+  
+  __FLOAT128_TYPE: It's defined to 1.
+
+  
+  New Intrinsics
+
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Base-Built-in-Functions.html;>
+  __builtin_thread_pointer
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-SX-Vector-Intrinsics.html;>
+  __lsx_*
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-ASX-Vector-Intrinsics.html;>
+  __lasx_*
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Base-Built-in-Functions.html;>
+  __frecipe_{s/d} and __frsqrte_{s/d}
+  
+
+  
+  New Compiler Option
+
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-ml_005ba_005dsx;>
+  -m[no-]lsx
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-ml_005ba_005dsx;>
+  -m[no-]lasx
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mfrecipe;>
+  -m[no-]frecipe
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mdiv32;>
+  -m[no-]div32
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mlam-bh;>
+  -m[no-]lam-bh
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mlamcas;>
+  -m[no-]lamcas
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mld-seq-sa;>
+  -m[no-]ld-seq-sa
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mrecip_003dopt;>
+  -mrecip=
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mrecip;>
+  -m[no-]recip
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mexplicit-relocs-1;>
+  -mexplicit-relocs={none,always,aut

[PATCH] wwwdocs: gcc-14/changes.html: Add Loongarch changes.

2024-04-22 Thread Lulu Cheng
---
 htdocs/gcc-14/changes.html | 156 +
 1 file changed, 156 insertions(+)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 9509487c..f0f0efe0 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -877,6 +877,162 @@ __asm (".global __flmap_lock"  "\n\t"
   
 
 
+LoongArch
+
+  Support for the following
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-march-7;>
+  -march parameters has been added:
+  
+la64v1.0
+la64v1.1
+la664
+  
+  It is now recommended to use -march=la64v1.0 as the only
+  compiler option to describe the target ISA when building binaries for
+  distribution. For more information on LoongArch ISA versions, see
+  https://github.com/loongson/la-toolchain-conventions/;>
+  Toolchain Conventions of the LoongArch™ Architecture.
+  
+  Support for the following
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mtune-8;>
+  -mtune parameters has been added:
+  
+generic
+la664
+  
+  
+  New ISA Extension
+
+  LSX (Loongson SIMD Extension): Support 128-bit vector instructions
+  and the intrinsics.
+  
+  LASX (Loongson Advanced SIMD Extension): Support 256-bit vector
+  instructions and the intrinsics.
+  
+  FRECIPE: Support frecipe.{s/d} and
+  frsqrte.{s/d} instructions and the intrinsics.
+  
+  DIV32: Support div.w[u] and mod.w[u]
+  instructions with inputs not sign-extended.
+  
+  LAM_BH: Support am{swap/add}[_db].{b/h} instructions.
+  
+  LAMCAS: Support amcas[_db].{b/h/w/d} instructions.
+  
+
+  
+  New Built-in Macros
+
+  __loongarch_arch: Target ISA preset as specified by
+  -march=. For example, compiling with
+  -march=la64v1.0, the value of 
__loongarch_arch
+  is "la64v1.0".
+  
+  __loongarch_tune: Processor model as specified by
+  -mtune or its default value.
+  
+  __loongarch_{simd,sx,asx}: These macros are not defined,
+  or defined as 1.
+  
+  __loongarch_simd_width: The maximum SIMD bit-width
+  enabled by the compiler. (128 for lsx, and 256 for lasx).
+  
+  __loongarch_frecipe: It's defined to 1 or undefined.
+  
+  __loongarch_div32: It's defined to 1 or undefined.
+  __loongarch_lam_bh: It's defined to 1 or undefined.
+  __loongarch_lamcas: It's defined to 1 or undefined.
+  __loongarch_ld_seq_sa: It's defined to 1 or undefined.
+  
+  __loongarch_version_major:
+  The minimally required LoongArch ISA version (major) to run the
+  compiled program, defined to 1 or undefined (iff no such version is
+  known to the compiler).
+  
+  __loongarch_version_minor:
+  The minimally required LoongArch ISA version (minor) to run the
+  compiled program, defined to 0 1 or undefined (iff
+  __loongarch_version_major is undefined).
+  
+  __FLOAT128_TYPE: It's defined to 1.
+
+  
+  New Intrinsics
+
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Base-Built-in-Functions.html;>
+  __builtin_thread_pointer
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-SX-Vector-Intrinsics.html;>
+  __lsx_*
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-ASX-Vector-Intrinsics.html;>
+  __lasx_*
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Base-Built-in-Functions.html;>
+  __frecipe_{s/d} and __frsqrte_{s/d}
+  
+
+  
+  New Compiler Option
+
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-ml_005ba_005dsx;>
+  -m[no-]lsx
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-ml_005ba_005dsx;>
+  -m[no-]lasx
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mfrecipe;>
+  -m[no-]frecipe
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mdiv32;>
+  -m[no-]div32
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mlam-bh;>
+  -m[no-]lam-bh
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mlamcas;>
+  -m[no-]lamcas
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mld-seq-sa;>
+  -m[no-]ld-seq-sa
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mrecip_003dopt;>
+  -mrecip=
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mrecip;>
+  -m[no-]recip
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mexplicit-relocs-1;>
+  -mexplicit-relocs={none,always,auto}
+  
+  https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Options.html#index-mrelax-2;>
+  -m[no-]relax
+  
+ 

Re:[pushed] [PATCH v4 1/2] LoongArch: Define ISA versions

2024-04-22 Thread Lulu Cheng

Pushed to r14-10083.

在 2024/4/23 上午10:42, Yang Yujie 写道:

These ISA versions are defined as -march= parameters and
are recommended for building binaries for distribution.

Detailed description of these definitions can be found at
https://github.com/loongson/la-toolchain-conventions, which
the LoongArch GCC port aims to conform to.

gcc/ChangeLog:

* config.gcc: Make la64v1.0 the default ISA preset of the lp64d ABI.
* config/loongarch/genopts/loongarch-strings: Define la64v1.0, la64v1.1.
* config/loongarch/genopts/loongarch.opt.in: Likewise.
* config/loongarch/loongarch-c.cc (LARCH_CPP_SET_PROCESSOR): Likewise.
(loongarch_cpu_cpp_builtins): Likewise.
* config/loongarch/loongarch-cpu.cc (get_native_prid): Likewise.
(fill_native_cpu_config): Likewise.
* config/loongarch/loongarch-def.cc (array_tune): Likewise.
* config/loongarch/loongarch-def.h: Likewise.
* config/loongarch/loongarch-driver.cc (driver_set_m_parm): Likewise.
(driver_get_normalized_m_opts): Likewise.
* config/loongarch/loongarch-opts.cc (default_tune_for_arch): Likewise.
(TUNE_FOR_ARCH): Likewise.
(arch_str): Likewise.
(loongarch_target_option_override): Likewise.
* config/loongarch/loongarch-opts.h (TARGET_uARCH_LA464): Likewise.
(TARGET_uARCH_LA664): Likewise.
* config/loongarch/loongarch-str.h (STR_CPU_ABI_DEFAULT): Likewise.
(STR_ARCH_ABI_DEFAULT): Likewise.
(STR_TUNE_GENERIC): Likewise.
(STR_ARCH_LA64V1_0): Likewise.
(STR_ARCH_LA64V1_1): Likewise.
* config/loongarch/loongarch.cc 
(loongarch_cpu_sched_reassociation_width): Likewise.
(loongarch_asm_code_end): Likewise.
* config/loongarch/loongarch.opt: Likewise.
* doc/invoke.texi: Likewise.
---
  gcc/config.gcc| 34 
  .../loongarch/genopts/loongarch-strings   |  5 +-
  gcc/config/loongarch/genopts/loongarch.opt.in | 43 --
  gcc/config/loongarch/loongarch-c.cc   | 37 +++--
  gcc/config/loongarch/loongarch-cpu.cc | 35 
  gcc/config/loongarch/loongarch-def.cc | 83 +--
  gcc/config/loongarch/loongarch-def.h  | 37 ++---
  gcc/config/loongarch/loongarch-driver.cc  |  8 +-
  gcc/config/loongarch/loongarch-opts.cc| 66 +++
  gcc/config/loongarch/loongarch-opts.h |  4 +-
  gcc/config/loongarch/loongarch-str.h  |  5 +-
  gcc/config/loongarch/loongarch.cc | 11 +--
  gcc/config/loongarch/loongarch.opt| 43 --
  gcc/doc/invoke.texi   | 57 -
  14 files changed, 300 insertions(+), 168 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5df3c52f8e9..929695c25ab 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5072,7 +5072,7 @@ case "${target}" in
  
  		# Perform initial sanity checks on --with-* options.

case ${with_arch} in
-   "" | abi-default | loongarch64 | la[46]64) ;; # OK, append here.
+   "" | la64v1.[01] | abi-default | loongarch64 | la[46]64) ;; # 
OK, append here.
native)
if test x${host} != x${target}; then
echo "--with-arch=native is illegal for 
cross-compiler." 1>&2
@@ -5119,10 +5119,18 @@ case "${target}" in
  
  		# Infer ISA-related default options from the ABI: pass 1

case ${abi_base}/${abi_ext} in
-   lp64*/base)
+   lp64d/base)
# architectures that support lp64* ABI
-   arch_pattern="native|abi-default|loongarch64|la[46]64"
-   # default architecture for lp64* ABI
+   
arch_pattern="native|abi-default|la64v1.[01]|loongarch64|la[46]64"
+
+   # default architecture for lp64d ABI
+   arch_default="la64v1.0"
+   ;;
+   lp64[fs]/base)
+   # architectures that support lp64* ABI
+   
arch_pattern="native|abi-default|la64v1.[01]|loongarch64|la[46]64"
+
+   # default architecture for lp64[fs] ABI
arch_default="abi-default"
;;
*)
@@ -5194,15 +5202,7 @@ case "${target}" in
  
  
  		# Check default with_tune configuration using with_arch.

-   case ${with_arch} in
-   loongarch64)
-   tune_pattern="native|abi-default|loongarch64|la[46]64"
-   ;;
-   *)
-   # By default, $with_tune == $with_arch
-   tune_pattern="*"
-   ;;
-   esac
+   tune_pattern="native|generic|loongarch64|la[46]64"
  
  		case ${with_tune} in

"") ;; # OK
@@ -5252,7 +5252,7 @@ case 

Re: [pushed][PATCH v4 2/2] LoongArch: Define builtin macros for ISA evolutions

2024-04-22 Thread Lulu Cheng

Pushed to r14-10084.

在 2024/4/23 上午10:42, Yang Yujie 写道:

Detailed description of these definitions can be found at
https://github.com/loongson/la-toolchain-conventions, which
the LoongArch GCC port aims to conform to.

gcc/ChangeLog:

* config.gcc: Add loongarch-evolution.o.
* config/loongarch/genopts/genstr.sh: Enable generation of
loongarch-evolution.[cc,h].
* config/loongarch/t-loongarch: Likewise.
* config/loongarch/genopts/gen-evolution.awk: New file.
* config/loongarch/genopts/isa-evolution.in: Mark ISA version
of introduction for each ISA evolution feature.
* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins):
Define builtin macros for enabled ISA evolutions and the ISA
version.
* config/loongarch/loongarch-cpu.cc: Use loongarch-evolution.h.
* config/loongarch/loongarch.h: Likewise.
* config/loongarch/loongarch-cpucfg-map.h: Delete.
* config/loongarch/loongarch-evolution.cc: New file.
* config/loongarch/loongarch-evolution.h: New file.
* config/loongarch/loongarch-opts.h (ISA_HAS_FRECIPE): Define.
(ISA_HAS_DIV32): Likewise.
(ISA_HAS_LAM_BH): Likewise.
(ISA_HAS_LAMCAS): Likewise.
(ISA_HAS_LD_SEQ_SA): Likewise.
---
  gcc/config.gcc|   2 +-
  .../loongarch/genopts/gen-evolution.awk   | 230 ++
  gcc/config/loongarch/genopts/genstr.sh|  82 ++-
  gcc/config/loongarch/genopts/isa-evolution.in |  10 +-
  gcc/config/loongarch/loongarch-c.cc   |  23 ++
  gcc/config/loongarch/loongarch-cpu.cc |   2 +-
  gcc/config/loongarch/loongarch-evolution.cc   |  60 +
  ...rch-cpucfg-map.h => loongarch-evolution.h} |  46 +++-
  gcc/config/loongarch/loongarch-opts.h |  11 -
  gcc/config/loongarch/loongarch.h  |   1 +
  gcc/config/loongarch/t-loongarch  |  26 +-
  11 files changed, 398 insertions(+), 95 deletions(-)
  create mode 100644 gcc/config/loongarch/genopts/gen-evolution.awk
  create mode 100644 gcc/config/loongarch/loongarch-evolution.cc
  rename gcc/config/loongarch/{loongarch-cpucfg-map.h => loongarch-evolution.h} 
(52%)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 929695c25ab..3364fc27524 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -485,7 +485,7 @@ loongarch*-*-*)
cpu_type=loongarch
d_target_objs="loongarch-d.o"
extra_headers="larchintrin.h lsxintrin.h lasxintrin.h"
-   extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o 
loongarch-opts.o loongarch-def.o"
+   extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o 
loongarch-opts.o loongarch-def.o loongarch-evolution.o"
extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o 
loongarch-def.o"
extra_options="${extra_options} g.opt fused-madd.opt"
;;
diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk 
b/gcc/config/loongarch/genopts/gen-evolution.awk
new file mode 100644
index 000..4d105afa906
--- /dev/null
+++ b/gcc/config/loongarch/genopts/gen-evolution.awk
@@ -0,0 +1,230 @@
+#!/usr/bin/gawk
+#
+# A simple script that generates loongarch-evolution.h
+# from genopts/isa-evolution.in
+#
+# Copyright (C) 2021-2024 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# .
+
+BEGIN {
+# isa_version_major[]
+# isa_version_minor[]
+# cpucfg_word[]
+# cpucfg_bit_in_word[]
+# name_capitalized[]
+# comment[]
+}
+
+{
+cpucfg_word[NR] = $1
+cpucfg_bit_in_word[NR] = $2
+name[NR] = gensub(/-/, "_", "g", $3)
+name_capitalized[NR] = toupper(name[NR])
+isa_version_major[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\1", 1, $4)
+isa_version_minor[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\2", 1, $4)
+
+$1 = $2 = $3 = $4 = ""
+sub (/^\s*/, "")
+comment[NR] = $0
+}
+
+function copyright_header(from_year,to_year)
+{
+print "   Copyright (C) " from_year "-" to_year \
+  " Free Software Foundation, Inc."
+print ""
+print "This file is part of GCC."
+print ""
+print "GCC is free software; you can redistribute it and/or modify"
+print "it under the terms of the GNU General Public License as published 
by"
+print "the Free Software Foundation; either version 3, or (at 

Re: [PATCH 1/2] LoongArch: Define ISA versions

2024-04-19 Thread Lulu Cheng


在 2024/4/19 下午10:27, Xi Ruoyao 写道:

On Fri, 2024-04-19 at 19:04 +0800, Yang Yujie wrote:

  @table @samp
  @item native
-This selects the CPU to generate code for at compilation time by determining
-the processor type of the compiling machine.  Using @option{-march=native}
-enables all instruction subsets supported by the local machine (hence
-the result might not run on different machines).  Using @option{-mtune=native}
-produces code optimized for the local machine under the constraints
-of the selected instruction set.
+Local processor type detected by the native compiler.
  @item loongarch64
-A generic CPU with 64-bit extensions.
+Generic LoongArch 64-bit processor.
  @item la464
-LoongArch LA464 CPU with LBT, LSX, LASX, LVZ.
+LoongArch LA464-based processor with LSX, LASX.
+@item la664
+LoongArch LA664-based processor with LSX, LASX and all LoongArch v1.1 features.

One LoongArch v1.1 feature "Hardware Page Table Walker" is not
implemented by LA664.  Maybe "all LoongArch v1.1 **unprivileged**
features"?

The description of *-march* is "+Generate instructions for the machine 
type @var{arch-type}.",


so is there no need to write it like this here?



+@item la64v1.0
+LoongArch64 ISA version 1.0.
+@item la64v1.1
+LoongArch64 ISA version 1.1.

IMO it's better to use a wording like LA664, i.e. "a CPU implementing
all LoongArch v1.1 unprivileged features" (emphasising "all", as the
v1.1 manual allows to only implement a subset of v1.1 features).



[PATCH] gcc-13/changes.html (LoongArch): Fix link.

2024-04-19 Thread Lulu Cheng
---
 htdocs/gcc-13/changes.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 4384c329..15a309d6 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -625,7 +625,7 @@ You may also want to check out our
   The new command-line option -mdirect-extern-access can 
be used
  to prevent accessing external symbols through GOT.
   
-  The new variable attribute https://gcc.gnu.org/onlinedocs/gcc/LoongArch-Variable-Attributes.html#LoongArch-Variable-Attributes;>model
+  The new variable attribute https://gcc.gnu.org/onlinedocs/gcc-13.1.0/gcc/LoongArch-Variable-Attributes.html#LoongArch-Variable-Attributes;>model
   has been added.
   
 
-- 
2.39.3



Re: [pushed][PATCH] LoongArch: Add indexes for some compilation options.

2024-04-15 Thread Lulu Cheng

Pushed to r14-9984.

在 2024/4/9 下午4:19, Lulu Cheng 写道:

gcc/ChangeLog:

* config/loongarch/loongarch.opt.urls: Regenerate.
* config/mn10300/mn10300.opt.urls: Likewise.
* config/msp430/msp430.opt.urls: Likewise.
* config/nds32/nds32-elf.opt.urls: Likewise.
* config/nds32/nds32-linux.opt.urls: Likewise.
* config/nds32/nds32.opt.urls: Likewise.
* config/pru/pru.opt.urls: Likewise.
* config/riscv/riscv.opt.urls: Likewise.
* config/rx/rx.opt.urls: Likewise.
* config/sh/sh.opt.urls: Likewise.
* config/sparc/sparc.opt.urls: Likewise.
* doc/invoke.texi: Add indexes for some compilation options.
---
  gcc/config/loongarch/loongarch.opt.urls | 9 +++--
  gcc/config/mn10300/mn10300.opt.urls | 2 +-
  gcc/config/msp430/msp430.opt.urls   | 2 +-
  gcc/config/nds32/nds32-elf.opt.urls | 2 +-
  gcc/config/nds32/nds32-linux.opt.urls   | 2 +-
  gcc/config/nds32/nds32.opt.urls | 2 +-
  gcc/config/pru/pru.opt.urls | 2 +-
  gcc/config/riscv/riscv.opt.urls | 2 +-
  gcc/config/rx/rx.opt.urls   | 2 +-
  gcc/config/sh/sh.opt.urls   | 2 +-
  gcc/config/sparc/sparc.opt.urls | 2 +-
  gcc/doc/invoke.texi | 7 ++-
  12 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.opt.urls 
b/gcc/config/loongarch/loongarch.opt.urls
index 88f0bb0f96f..9ed5d7b5596 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -57,12 +57,17 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
  mrecip
  UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
  
-; skipping UrlSuffix for 'mcmodel=' due to finding no URLs

+mcmodel=
+UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel)
  
  mdirect-extern-access

  UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access)
  
-; skipping UrlSuffix for 'mrelax' due to finding no URLs

+mrelax
+UrlSuffix(gcc/LoongArch-Options.html#index-mrelax-2)
+
+mpass-mrelax-to-as
+UrlSuffix(gcc/LoongArch-Options.html#index-mpass-mrelax-to-as)
  
  mtls-dialect=

  UrlSuffix(gcc/LoongArch-Options.html#index-mtls-dialect-1)
diff --git a/gcc/config/mn10300/mn10300.opt.urls 
b/gcc/config/mn10300/mn10300.opt.urls
index 396ca4aa2e6..d0d1cce53a0 100644
--- a/gcc/config/mn10300/mn10300.opt.urls
+++ b/gcc/config/mn10300/mn10300.opt.urls
@@ -19,7 +19,7 @@ mno-crt0
  UrlSuffix(gcc/MN10300-Options.html#index-mno-crt0)
  
  mrelax

-UrlSuffix(gcc/MN10300-Options.html#index-mrelax-2)
+UrlSuffix(gcc/MN10300-Options.html#index-mrelax-3)
  
  mreturn-pointer-on-d0

  UrlSuffix(gcc/MN10300-Options.html#index-mreturn-pointer-on-d0)
diff --git a/gcc/config/msp430/msp430.opt.urls 
b/gcc/config/msp430/msp430.opt.urls
index 420c1c50f13..b8b8f9ce184 100644
--- a/gcc/config/msp430/msp430.opt.urls
+++ b/gcc/config/msp430/msp430.opt.urls
@@ -28,7 +28,7 @@ msmall
  UrlSuffix(gcc/MSP430-Options.html#index-msmall)
  
  mrelax

-UrlSuffix(gcc/MSP430-Options.html#index-mrelax-3)
+UrlSuffix(gcc/MSP430-Options.html#index-mrelax-4)
  
  minrt

  UrlSuffix(gcc/MSP430-Options.html#index-minrt)
diff --git a/gcc/config/nds32/nds32-elf.opt.urls 
b/gcc/config/nds32/nds32-elf.opt.urls
index 5399afba7d3..3ae1efe7312 100644
--- a/gcc/config/nds32/nds32-elf.opt.urls
+++ b/gcc/config/nds32/nds32-elf.opt.urls
@@ -1,5 +1,5 @@
  ; Autogenerated by regenerate-opt-urls.py from gcc/config/nds32/nds32-elf.opt 
and generated HTML
  
  mcmodel=

-UrlSuffix(gcc/NDS32-Options.html#index-mcmodel)
+UrlSuffix(gcc/NDS32-Options.html#index-mcmodel-1)
  
diff --git a/gcc/config/nds32/nds32-linux.opt.urls b/gcc/config/nds32/nds32-linux.opt.urls

index 27d39f04ad9..ac589ccd472 100644
--- a/gcc/config/nds32/nds32-linux.opt.urls
+++ b/gcc/config/nds32/nds32-linux.opt.urls
@@ -1,5 +1,5 @@
  ; Autogenerated by regenerate-opt-urls.py from 
gcc/config/nds32/nds32-linux.opt and generated HTML
  
  mcmodel=

-UrlSuffix(gcc/NDS32-Options.html#index-mcmodel)
+UrlSuffix(gcc/NDS32-Options.html#index-mcmodel-1)
  
diff --git a/gcc/config/nds32/nds32.opt.urls b/gcc/config/nds32/nds32.opt.urls

index e34512d69ba..44fa0696b95 100644
--- a/gcc/config/nds32/nds32.opt.urls
+++ b/gcc/config/nds32/nds32.opt.urls
@@ -51,7 +51,7 @@ mctor-dtor
  UrlSuffix(gcc/NDS32-Options.html#index-mctor-dtor)
  
  mrelax

-UrlSuffix(gcc/NDS32-Options.html#index-mrelax-4)
+UrlSuffix(gcc/NDS32-Options.html#index-mrelax-5)
  
  ; skipping UrlSuffix for 'munaligned-access' due to finding no URLs
  
diff --git a/gcc/config/pru/pru.opt.urls b/gcc/config/pru/pru.opt.urls

index 1f8a26a0db5..c87affb112b 100644
--- a/gcc/config/pru/pru.opt.urls
+++ b/gcc/config/pru/pru.opt.urls
@@ -7,7 +7,7 @@ mmcu=
  UrlSuffix(gcc/PRU-Options.html#index-mmcu-2)
  
  mno-relax

-UrlSuffix(gcc/PRU-Options.html#index-mno-relax)
+UrlSuffix(gcc/PRU-Options.html#index-mno-relax-1)
  
  mloop

  UrlSuffix(gcc/PRU-Options.html#index-mloop)
diff --git a/gcc/config/riscv

Re:[pushed] [PATCH v2] LoongArch: Enable switchable target

2024-04-09 Thread Lulu Cheng

Pushed to r14-9866.

在 2024/4/8 下午4:45, Yang Yujie 写道:

This patch fixes the back-end context switching in cases where functions
should be built with their own target contexts instead of the
global one, such as LTO linking and functions with target attributes (TBD).

PR target/113233

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_reg_init):
Reinitialize the loongarch_regno_mode_ok cache.
(loongarch_option_override): Same.
(loongarch_save_restore_target_globals): Restore target globals.
(loongarch_set_current_function): Restore the target contexts
for functions.
(TARGET_SET_CURRENT_FUNCTION): Define.
* config/loongarch/loongarch.h (SWITCHABLE_TARGET): Enable
switchable target context.
* config/loongarch/loongarch-builtins.cc (loongarch_init_builtins):
Initialize all builtin functions at startup.
(loongarch_expand_builtin): Turn assertion of builtin availability
into a test.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Define condition loongarch_sx_as.
* gcc.dg/lto/pr113233_0.c: New test.
---
  gcc/config/loongarch/loongarch-builtins.cc | 25 +++---
  gcc/config/loongarch/loongarch.cc  | 91 --
  gcc/config/loongarch/loongarch.h   |  2 +
  gcc/testsuite/gcc.dg/lto/pr113233_0.c  | 14 
  gcc/testsuite/lib/target-supports.exp  | 12 +++
  5 files changed, 127 insertions(+), 17 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/lto/pr113233_0.c

diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index efe7e5e5ebc..fbe46833c9b 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -2512,14 +2512,11 @@ loongarch_init_builtins (void)
for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++)
  {
d = _builtins[i];
-  if (d->avail ())
-   {
- type = loongarch_build_function_type (d->function_type);
- loongarch_builtin_decls[i]
-   = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL,
-   NULL);
- loongarch_get_builtin_decl_index[d->icode] = i;
-   }
+  type = loongarch_build_function_type (d->function_type);
+  loongarch_builtin_decls[i]
+   = add_builtin_function (d->name, type, i, BUILT_IN_MD, NULL,
+ NULL);
+  loongarch_get_builtin_decl_index[d->icode] = i;
  }
  }
  
@@ -3105,15 +3102,21 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,

  int ignore ATTRIBUTE_UNUSED)
  {
tree fndecl;
-  unsigned int fcode, avail;
+  unsigned int fcode;
const struct loongarch_builtin_description *d;
  
fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);

fcode = DECL_MD_FUNCTION_CODE (fndecl);
gcc_assert (fcode < ARRAY_SIZE (loongarch_builtins));
d = _builtins[fcode];
-  avail = d->avail ();
-  gcc_assert (avail != 0);
+
+  if (!d->avail ())
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qD is not enabled", fndecl);
+  return target;
+}
+
switch (d->builtin_type)
  {
  case LARCH_BUILTIN_DIRECT:
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index c90b701a533..6b92e7034c5 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -7570,15 +7570,19 @@ loongarch_global_init (void)
loongarch_dwarf_regno[i] = INVALID_REGNUM;
  }
  
+  /* Function to allocate machine-dependent function status.  */

+  init_machine_status = _init_machine_status;
+};
+
+static void
+loongarch_reg_init (void)
+{
/* Set up loongarch_hard_regno_mode_ok.  */
for (int mode = 0; mode < MAX_MACHINE_MODE; mode++)
  for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
loongarch_hard_regno_mode_ok_p[mode][regno]
= loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
-
-  /* Function to allocate machine-dependent function status.  */
-  init_machine_status = _init_machine_status;
-};
+}
  
  static void

  loongarch_option_override_internal (struct loongarch_target *target,
@@ -7605,20 +7609,92 @@ loongarch_option_override_internal (struct 
loongarch_target *target,
  
/* Override some options according to the resolved target.  */

loongarch_target_option_override (target, opts, opts_set);
+
+  target_option_default_node = target_option_current_node
+= build_target_option_node (opts, opts_set);
+
+  loongarch_reg_init ();
+}
+
+/* Remember the last target of loongarch_set_current_function.  */
+
+static GTY(()) tree loongarch_previous_fndecl;
+
+/* Restore or save the TREE_TARGET_GLOBALS from or to new_tree.
+   Used by loongarch_set_current_function to
+   make sure optab availability predicates are recomputed when necessary.  */
+
+static void

[PATCH] LoongArch: Add indexes for some compilation options.

2024-04-09 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.opt.urls: Regenerate.
* config/mn10300/mn10300.opt.urls: Likewise.
* config/msp430/msp430.opt.urls: Likewise.
* config/nds32/nds32-elf.opt.urls: Likewise.
* config/nds32/nds32-linux.opt.urls: Likewise.
* config/nds32/nds32.opt.urls: Likewise.
* config/pru/pru.opt.urls: Likewise.
* config/riscv/riscv.opt.urls: Likewise.
* config/rx/rx.opt.urls: Likewise.
* config/sh/sh.opt.urls: Likewise.
* config/sparc/sparc.opt.urls: Likewise.
* doc/invoke.texi: Add indexes for some compilation options.
---
 gcc/config/loongarch/loongarch.opt.urls | 9 +++--
 gcc/config/mn10300/mn10300.opt.urls | 2 +-
 gcc/config/msp430/msp430.opt.urls   | 2 +-
 gcc/config/nds32/nds32-elf.opt.urls | 2 +-
 gcc/config/nds32/nds32-linux.opt.urls   | 2 +-
 gcc/config/nds32/nds32.opt.urls | 2 +-
 gcc/config/pru/pru.opt.urls | 2 +-
 gcc/config/riscv/riscv.opt.urls | 2 +-
 gcc/config/rx/rx.opt.urls   | 2 +-
 gcc/config/sh/sh.opt.urls   | 2 +-
 gcc/config/sparc/sparc.opt.urls | 2 +-
 gcc/doc/invoke.texi | 7 ++-
 12 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.opt.urls 
b/gcc/config/loongarch/loongarch.opt.urls
index 88f0bb0f96f..9ed5d7b5596 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -57,12 +57,17 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
 mrecip
 UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
 
-; skipping UrlSuffix for 'mcmodel=' due to finding no URLs
+mcmodel=
+UrlSuffix(gcc/LoongArch-Options.html#index-mcmodel)
 
 mdirect-extern-access
 UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access)
 
-; skipping UrlSuffix for 'mrelax' due to finding no URLs
+mrelax
+UrlSuffix(gcc/LoongArch-Options.html#index-mrelax-2)
+
+mpass-mrelax-to-as
+UrlSuffix(gcc/LoongArch-Options.html#index-mpass-mrelax-to-as)
 
 mtls-dialect=
 UrlSuffix(gcc/LoongArch-Options.html#index-mtls-dialect-1)
diff --git a/gcc/config/mn10300/mn10300.opt.urls 
b/gcc/config/mn10300/mn10300.opt.urls
index 396ca4aa2e6..d0d1cce53a0 100644
--- a/gcc/config/mn10300/mn10300.opt.urls
+++ b/gcc/config/mn10300/mn10300.opt.urls
@@ -19,7 +19,7 @@ mno-crt0
 UrlSuffix(gcc/MN10300-Options.html#index-mno-crt0)
 
 mrelax
-UrlSuffix(gcc/MN10300-Options.html#index-mrelax-2)
+UrlSuffix(gcc/MN10300-Options.html#index-mrelax-3)
 
 mreturn-pointer-on-d0
 UrlSuffix(gcc/MN10300-Options.html#index-mreturn-pointer-on-d0)
diff --git a/gcc/config/msp430/msp430.opt.urls 
b/gcc/config/msp430/msp430.opt.urls
index 420c1c50f13..b8b8f9ce184 100644
--- a/gcc/config/msp430/msp430.opt.urls
+++ b/gcc/config/msp430/msp430.opt.urls
@@ -28,7 +28,7 @@ msmall
 UrlSuffix(gcc/MSP430-Options.html#index-msmall)
 
 mrelax
-UrlSuffix(gcc/MSP430-Options.html#index-mrelax-3)
+UrlSuffix(gcc/MSP430-Options.html#index-mrelax-4)
 
 minrt
 UrlSuffix(gcc/MSP430-Options.html#index-minrt)
diff --git a/gcc/config/nds32/nds32-elf.opt.urls 
b/gcc/config/nds32/nds32-elf.opt.urls
index 5399afba7d3..3ae1efe7312 100644
--- a/gcc/config/nds32/nds32-elf.opt.urls
+++ b/gcc/config/nds32/nds32-elf.opt.urls
@@ -1,5 +1,5 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/nds32/nds32-elf.opt 
and generated HTML
 
 mcmodel=
-UrlSuffix(gcc/NDS32-Options.html#index-mcmodel)
+UrlSuffix(gcc/NDS32-Options.html#index-mcmodel-1)
 
diff --git a/gcc/config/nds32/nds32-linux.opt.urls 
b/gcc/config/nds32/nds32-linux.opt.urls
index 27d39f04ad9..ac589ccd472 100644
--- a/gcc/config/nds32/nds32-linux.opt.urls
+++ b/gcc/config/nds32/nds32-linux.opt.urls
@@ -1,5 +1,5 @@
 ; Autogenerated by regenerate-opt-urls.py from 
gcc/config/nds32/nds32-linux.opt and generated HTML
 
 mcmodel=
-UrlSuffix(gcc/NDS32-Options.html#index-mcmodel)
+UrlSuffix(gcc/NDS32-Options.html#index-mcmodel-1)
 
diff --git a/gcc/config/nds32/nds32.opt.urls b/gcc/config/nds32/nds32.opt.urls
index e34512d69ba..44fa0696b95 100644
--- a/gcc/config/nds32/nds32.opt.urls
+++ b/gcc/config/nds32/nds32.opt.urls
@@ -51,7 +51,7 @@ mctor-dtor
 UrlSuffix(gcc/NDS32-Options.html#index-mctor-dtor)
 
 mrelax
-UrlSuffix(gcc/NDS32-Options.html#index-mrelax-4)
+UrlSuffix(gcc/NDS32-Options.html#index-mrelax-5)
 
 ; skipping UrlSuffix for 'munaligned-access' due to finding no URLs
 
diff --git a/gcc/config/pru/pru.opt.urls b/gcc/config/pru/pru.opt.urls
index 1f8a26a0db5..c87affb112b 100644
--- a/gcc/config/pru/pru.opt.urls
+++ b/gcc/config/pru/pru.opt.urls
@@ -7,7 +7,7 @@ mmcu=
 UrlSuffix(gcc/PRU-Options.html#index-mmcu-2)
 
 mno-relax
-UrlSuffix(gcc/PRU-Options.html#index-mno-relax)
+UrlSuffix(gcc/PRU-Options.html#index-mno-relax-1)
 
 mloop
 UrlSuffix(gcc/PRU-Options.html#index-mloop)
diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls
index da31820e234..6184121b2bf 100644
--- a/gcc/config/riscv/riscv.opt.urls
+++ 

Re:[pushed] [PATCH v1] LoongArch: Set default alignment for functions jumps and loops [PR112919].

2024-04-07 Thread Lulu Cheng



在 2024/4/6 下午5:53, Xi Ruoyao 写道:

On Tue, 2024-04-02 at 15:03 +0800, Lulu Cheng wrote:

+/* Alignment for functions loops and jumps for best performance.  For new
+   uarchs the value should be measured via benchmarking.  See the documentation
+   for -falign-functions -falign-loops and -falign-jumps in invoke.texi for the

^ ^

Better have two commas here.

Otherwise it should be OK.


+   format.  */

Modify the annotation information and pushed to r14-9824.



Re:[pushed] [PATCH] LoongArch: Remove unused code

2024-04-02 Thread Lulu Cheng

Pushed to r14-9766.

在 2024/4/2 下午2:33, Jiahao Xu 写道:

For machines that satisfy ISA_HAS_LSX && !TARGET_64BIT, we will not support 
them now
and in the future, so this patch removes these unused code.

gcc/ChangeLog:

* config/loongarch/lasx.md: Remove unused code.
* config/loongarch/loongarch-protos.h (loongarch_split_lsx_copy_d): 
Remove.
(loongarch_split_lsx_insert_d): Ditto.
(loongarch_split_lsx_fill_d): Ditto.
* config/loongarch/loongarch.cc (loongarch_split_lsx_copy_d): Ditto.
(loongarch_split_lsx_insert_d): Ditto.
(loongarch_split_lsx_fill_d): Ditto.
* config/loongarch/lsx.md (lsx_vpickve2gr_du): Remove splitter.
(lsx_vpickve2gr_): Ditto.
(abs2): Remove expander.
 (vabs2): Rename to abs2.

gcc/testsuite/ChangeLog:

 * gcc.target/loongarch/vector/lsx/lsx-abs.c: New test.
---
  gcc/config/loongarch/lasx.md  | 12 +--
  gcc/config/loongarch/loongarch-protos.h   |  3 -
  gcc/config/loongarch/loongarch.cc | 76 
  gcc/config/loongarch/lsx.md   | 89 ++-
  .../gcc.target/loongarch/vector/lsx/lsx-abs.c | 26 ++
  5 files changed, 35 insertions(+), 171 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-abs.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 2fa5e46c8e8..7bd61f8ed5b 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -572,12 +572,7 @@ (define_insn "lasx_xvinsgr2vr_"
  (match_operand 3 "const__operand" "")))]
"ISA_HAS_LASX"
  {
-#if 0
-  if (!TARGET_64BIT && (mode == V4DImode || mode == V4DFmode))
-return "#";
-  else
-#endif
-return "xvinsgr2vr.\t%u0,%z1,%y3";
+  return "xvinsgr2vr.\t%u0,%z1,%y3";
  }
[(set_attr "type" "simd_insert")
 (set_attr "mode" "")])
@@ -1446,10 +1441,7 @@ (define_insn "lasx_xvreplgr2vr_"
if (which_alternative == 1)
  return "xvldi.b\t%u0,0" ;
  
-  if (!TARGET_64BIT && (mode == V2DImode || mode == V2DFmode))

-return "#";
-  else
-return "xvreplgr2vr.\t%u0,%z1";
+  return "xvreplgr2vr.\t%u0,%z1";
  }
[(set_attr "type" "simd_fill")
 (set_attr "mode" "")
diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index e3ed2b912a5..e238d795a73 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -89,9 +89,6 @@ extern void loongarch_split_128bit_move (rtx, rtx);
  extern bool loongarch_split_128bit_move_p (rtx, rtx);
  extern void loongarch_split_256bit_move (rtx, rtx);
  extern bool loongarch_split_256bit_move_p (rtx, rtx);
-extern void loongarch_split_lsx_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx));
-extern void loongarch_split_lsx_insert_d (rtx, rtx, rtx, rtx);
-extern void loongarch_split_lsx_fill_d (rtx, rtx);
  extern const char *loongarch_output_move (rtx, rtx);
  #ifdef RTX_CODE
  extern void loongarch_expand_scc (rtx *);
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index a69a203fbe6..8438cc64b0d 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4756,82 +4756,6 @@ loongarch_split_256bit_move (rtx dest, rtx src)
  }
  }
  
-

-/* Split a COPY_S.D with operands DEST, SRC and INDEX.  GEN is a function
-   used to generate subregs.  */
-
-void
-loongarch_split_lsx_copy_d (rtx dest, rtx src, rtx index,
-   rtx (*gen_fn)(rtx, rtx, rtx))
-{
-  gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode)
- || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode));
-
-  /* Note that low is always from the lower index, and high is always
- from the higher index.  */
-  rtx low = loongarch_subword (dest, false);
-  rtx high = loongarch_subword (dest, true);
-  rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
-
-  emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2)));
-  emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1)));
-}
-
-/* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2.  */
-
-void
-loongarch_split_lsx_insert_d (rtx dest, rtx src1, rtx index, rtx src2)
-{
-  int i;
-  gcc_assert (GET_MODE (dest) == GET_MODE (src1));
-  gcc_assert ((GET_MODE (dest) == V2DImode
-  && (GET_MODE (src2) == DImode || src2 == const0_rtx))
- || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode));
-
-  /* Note that low is always from the lower index, and high is always
- from the higher index.  */
-  rtx low = loongarch_subword (src2, false);
-  rtx high = loongarch_subword (src2, true);
-  rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
-  rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0);
-  i = exact_log2 (INTVAL (index));
-  gcc_assert (i != -1);
-
-  emit_insn (gen_lsx_vinsgr2vr_w (new_dest, low, new_src1,
-  

[PATCH v1] LoongArch: Set default alignment for functions jumps and loops [PR112919].

2024-04-02 Thread Lulu Cheng
Xi Ruoyao set the alignment rules under LA464 in commit r14-1839,
but the macro ASM_OUTPUT_ALIGN_WITH_NOP was removed in R14-4674,
which affected the alignment rules.

So I set different aligns on LA464 and LA664 again to test the
performance of spec2006, and modify the alignment based on the test
results.

gcc/ChangeLog:

PR target/112919
* config/loongarch/loongarch-def.cc (la664_align): Newly defined
function that sets alignment rules under the LA664 microarchitecture.
* config/loongarch/loongarch-opts.cc
(loongarch_target_option_override): If not optimizing for size, set
the default alignment to what the target wants.
* config/loongarch/loongarch-tune.h (struct loongarch_align): Add
new member variables jump and loop.
---
 gcc/config/loongarch/loongarch-def.cc  | 11 ---
 gcc/config/loongarch/loongarch-opts.cc | 19 +--
 gcc/config/loongarch/loongarch-tune.h  | 22 +++---
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/gcc/config/loongarch/loongarch-def.cc 
b/gcc/config/loongarch/loongarch-def.cc
index e8c129ce643..63a8f108f4e 100644
--- a/gcc/config/loongarch/loongarch-def.cc
+++ b/gcc/config/loongarch/loongarch-def.cc
@@ -81,14 +81,19 @@ array_tune loongarch_cpu_cache =
 
 static inline loongarch_align la464_align ()
 {
-  return loongarch_align ().function_ ("32").label_ ("16");
+  return loongarch_align ().function_ ("32").loop_ ("16").jump_ ("16");
+}
+
+static inline loongarch_align la664_align ()
+{
+  return loongarch_align ().function_ ("8").loop_ ("8").jump_ ("32");
 }
 
 array_tune loongarch_cpu_align =
   array_tune ()
-.set (CPU_LOONGARCH64, la464_align ())
+.set (CPU_LOONGARCH64, la664_align ())
 .set (CPU_LA464, la464_align ())
-.set (CPU_LA664, la464_align ());
+.set (CPU_LA664, la664_align ());
 
 /* Default RTX cost initializer.  */
 loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
diff --git a/gcc/config/loongarch/loongarch-opts.cc 
b/gcc/config/loongarch/loongarch-opts.cc
index 2a6fc41b247..7b21cc311a8 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -922,13 +922,20 @@ loongarch_target_option_override (struct loongarch_target 
*target,
 {
   loongarch_update_gcc_opt_status (target, opts, opts_set);
 
-  /* alignments */
-  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
-opts->x_str_align_functions
-  = loongarch_cpu_align[target->cpu_tune].function;
+  /* If not optimizing for size, set the default
+ alignment to what the target wants.  */
+  if (!opts->x_optimize_size)
+{
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+   opts->x_str_align_functions
+ = loongarch_cpu_align[target->cpu_tune].function;
+
+  if (opts->x_flag_align_loops && !opts->x_str_align_loops)
+   opts->x_str_align_loops = loongarch_cpu_align[target->cpu_tune].loop;
 
-  if (opts->x_flag_align_labels && !opts->x_str_align_labels)
-opts->x_str_align_labels = loongarch_cpu_align[target->cpu_tune].label;
+  if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
+   opts->x_str_align_jumps = loongarch_cpu_align[target->cpu_tune].jump;
+}
 
   /* Set up parameters to be used in prefetching algorithm.  */
   int simultaneous_prefetches
diff --git a/gcc/config/loongarch/loongarch-tune.h 
b/gcc/config/loongarch/loongarch-tune.h
index 72b75f6de3f..3974edf9a90 100644
--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -162,14 +162,16 @@ struct loongarch_cache {
   }
 };
 
-/* Alignment for functions and labels for best performance.  For new uarchs
-   the value should be measured via benchmarking.  See the documentation for
-   -falign-functions and -falign-labels in invoke.texi for the format.  */
+/* Alignment for functions loops and jumps for best performance.  For new
+   uarchs the value should be measured via benchmarking.  See the documentation
+   for -falign-functions -falign-loops and -falign-jumps in invoke.texi for the
+   format.  */
 struct loongarch_align {
   const char *function;/* default value for -falign-functions */
-  const char *label;   /* default value for -falign-labels */
+  const char *loop;/* default value for -falign-loops */
+  const char *jump;/* default value for -falign-jumps */
 
-  loongarch_align () : function (nullptr), label (nullptr) {}
+  loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr) {}
 
   loongarch_align function_ (const char *_function)
   {
@@ -177,9 +179,15 @@ struct loongarch_align {
 return *this;
   }
 
-  loongarch_align label_ (const char *_label)
+  loongarch_align loop_ (const char *_loop)
   {
-label = _label;
+loop = _loop;
+return *this;
+  }
+
+  loongarch_align jump_ (const char *_jump)
+  {
+jump = _jump;
 return *this;
   }
 };
-- 
2.39.3



[PATCH] Regenerate loongarch.opt.urls.

2024-03-31 Thread Lulu Cheng
Fixes: d28ea8e5a704 ("LoongArch: Split loongarch_option_override_internal
  into smaller procedures")

gcc/ChangeLog:

* config/loongarch/loongarch.opt.urls: Regenerate.
---
 gcc/config/loongarch/loongarch.opt.urls | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.opt.urls 
b/gcc/config/loongarch/loongarch.opt.urls
index c78bbfea2da..8e16304b66a 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -51,10 +51,10 @@ 
UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1)
 mexplicit-relocs
 UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1)
 
-mrecip
+mrecip=
 UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
 
-mrecip=
+mrecip
 UrlSuffix(gcc/LoongArch-Options.html#index-mrecip)
 
 ; skipping UrlSuffix for 'mcmodel=' due to finding no URLs
@@ -64,3 +64,18 @@ 
UrlSuffix(gcc/LoongArch-Options.html#index-mdirect-extern-access)
 
 ; skipping UrlSuffix for 'mrelax' due to finding no URLs
 
+mfrecipe
+UrlSuffix(gcc/LoongArch-Options.html#index-mfrecipe)
+
+mdiv32
+UrlSuffix(gcc/LoongArch-Options.html#index-mdiv32)
+
+mlam-bh
+UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh)
+
+mlamcas
+UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas)
+
+mld-seq-sa
+UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa)
+
-- 
2.39.3



[PATCH] LoongArch: Add descriptions of the compilation options.

2024-03-30 Thread Lulu Cheng
Add descriptions for the compilation options '-mfrecipe' '-mdiv32'
'-mlam-bh' '-mlamcas' and '-mld-seq-sa'.

gcc/ChangeLog:

* doc/invoke.texi: Add descriptions for the compilation
options.
---
 gcc/doc/invoke.texi | 45 +++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c584664e168..942103c23f5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1058,8 +1058,9 @@ Objective-C and Objective-C++ Dialects}.
 -mmax-inline-memcpy-size=@var{n}
 -mexplicit-relocs=@var{style} -mexplicit-relocs -mno-explicit-relocs
 -mdirect-extern-access -mno-direct-extern-access
--mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as}
--mrecip  -mrecip=@var{opt}
+-mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as
+-mrecip  -mrecip=@var{opt} -mfrecipe -mno-frecipe -mdiv32 -mno-div32
+-mlam-bh -mno-lam-bh -mlamcas -mno-lamcas -mld-seq-sa -mno-ld-seq-sa}
 
 @emph{M32R/D Options}
 @gccoptlist{-m32r2  -m32rx  -m32r
@@ -27095,6 +27096,46 @@ Enable the approximation for vectorized reciprocal 
square root.
 So, for example, @option{-mrecip=all,!sqrt} enables
 all of the reciprocal approximations, except for scalar square root.
 
+@opindex mfrecipe
+@opindex mno-frecipe
+@item -mfrecipe
+@itemx -mno-frecipe
+Use (do not use) @code{frecipe.@{s/d@}} and @code{frsqrte.@{s/d@}}
+instructions.  When build with @option{-march=la664}, it is enabled by default.
+The default is @option{-mno-frecipe}.
+
+@opindex mdiv32
+@opindex mno-div32
+@item -mdiv32
+@itemx -mno-div32
+Use (do not use) @code{div.w[u]} and @code{mod.w[u]} instructions with input
+not sign-extended.  When build with @option{-march=la664}, it is enabled by
+default.  The default is @option{-mno-div32}.
+
+@opindex mlam-bh
+@opindex mno-lam-bh
+@item -mlam-bh
+@itemx -mno-lam-bh
+Use (do not use) @code{am@{swap/add@}[_db].@{b/h@}} instructions.  When build
+with @option{-march=la664}, it is enabled by default.  The default is
+@option{-mno-lam-bh}.
+
+@opindex mlamcas
+@opindex mno-lamcas
+@item -mlamcas
+@itemx -mno-lamcas
+Use (do not use) @code{amcas[_db].@{b/h/w/d@}} instructions.  When build with
+@option{-march=la664}, it is enabled by default.  The default is
+@option{-mno-lamcas}.
+
+@opindex mld-seq-sa
+@opindex mno-ld-seq-sa
+@item -mld-seq-sa
+@itemx -mno-ld-seq-sa
+Whether a load-load barrier (@code{dbar 0x700}) is needed.  When build with
+@option{-march=la664}, it is enabled by default.  The default is
+@option{-mno-ld-seq-sa}, the load-load barrier is needed.
+
 @item loongarch-vect-unroll-limit
 The vectorizer will use available tuning information to determine whether it
 would be beneficial to unroll the main vectorized loop and by how much.  This
-- 
2.39.3



[PATCH] LoongArch: gcc13: Implement option save/restore.

2024-03-16 Thread Lulu Cheng
LTO option streaming and target attributes both require per-function
target configuration, which is achieved via option save/restore.

We implement TARGET_OPTION_{SAVE,RESTORE} to switch the la_target
context in addition to other automatically maintained option states
(via the "Save" option property in the .opt files).

PR target/113233

gcc/ChangeLog:

* config/loongarch/genopts/loongarch.opt.in: Mark options with
the "Save" property.
* config/loongarch/loongarch-opts.cc
(loongarch_update_gcc_opt_status): Update the value of the
la_target to global_options.
* config/loongarch/loongarch-opts.h
(loongarch_update_gcc_opt_status): Add a function declaration.
* config/loongarch/loongarch.cc
(loongarch_option_override_internal): Call the function
loongarch_update_gcc_opt_status.
(loongarch_option_save): New functions.
(loongarch_option_restore): Likewise.
(TARGET_OPTION_SAVE): Define macro.
(TARGET_OPTION_RESTORE): Likewise.
* config/loongarch/loongarch.opt: Regenerate.
---
 gcc/config/loongarch/genopts/loongarch.opt.in | 24 ++---
 gcc/config/loongarch/loongarch-opts.cc| 22 
 gcc/config/loongarch/loongarch-opts.h |  6 
 gcc/config/loongarch/loongarch.cc | 34 +--
 gcc/config/loongarch/loongarch.opt| 24 ++---
 5 files changed, 84 insertions(+), 26 deletions(-)

diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 76acd35d39c..aea4f2a4f61 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -58,7 +58,7 @@ EnumValue
 Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64)
 
 m@@OPTSTR_ISA_EXT_FPU@@=
-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) 
Init(M_OPTION_NOT_SEEN)
+Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) 
Init(M_OPTION_NOT_SEEN) Save
 -m@@OPTSTR_ISA_EXT_FPU@@=FPU   Generate code for the given FPU.
 
 m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@
@@ -92,11 +92,11 @@ EnumValue
 Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
 
 m@@OPTSTR_ARCH@@=
-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) 
Init(M_OPTION_NOT_SEEN)
+Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) 
Init(M_OPTION_NOT_SEEN) Save
 -m@@OPTSTR_ARCH@@=PROCESSORGenerate code for the given PROCESSOR ISA.
 
 m@@OPTSTR_TUNE@@=
-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) 
Init(M_OPTION_NOT_SEEN)
+Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) 
Init(M_OPTION_NOT_SEEN) Save
 -m@@OPTSTR_TUNE@@=PROCESSORGenerate optimized code for PROCESSOR.
 
 
@@ -127,31 +127,31 @@ int la_opt_abi_ext = M_OPTION_NOT_SEEN
 
 
 mbranch-cost=
-Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
+Target RejectNegative Joined UInteger Var(loongarch_branch_cost) Save
 -mbranch-cost=COST Set the cost of branches to roughly COST instructions.
 
 mcheck-zero-division
-Target Mask(CHECK_ZERO_DIV)
+Target Mask(CHECK_ZERO_DIV) Save
 Trap on integer divide by zero.
 
 mcond-move-int
-Target Var(TARGET_COND_MOVE_INT) Init(1)
+Target Var(TARGET_COND_MOVE_INT) Init(1) Save
 Conditional moves for integral are enabled.
 
 mcond-move-float
-Target Var(TARGET_COND_MOVE_FLOAT) Init(1)
+Target Var(TARGET_COND_MOVE_FLOAT) Init(1) Save
 Conditional moves for float are enabled.
 
 mmemcpy
-Target Mask(MEMCPY)
+Target Mask(MEMCPY) Save
 Prevent optimizing block moves, which is also the default behavior of -Os.
 
 mstrict-align
-Target Var(TARGET_STRICT_ALIGN) Init(0)
+Target Var(TARGET_STRICT_ALIGN) Init(0) Save
 Do not generate unaligned memory accesses.
 
 mmax-inline-memcpy-size=
-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) 
Init(1024)
+Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) 
Init(1024) Save
 -mmax-inline-memcpy-size=SIZE  Set the max size of memcpy to inline, default 
is 1024.
 
 mexplicit-relocs
@@ -182,11 +182,11 @@ EnumValue
 Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME)
 
 mcmodel=
-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) 
Init(CMODEL_NORMAL)
+Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) 
Init(CMODEL_NORMAL) Save
 Specify the code model.
 
 mdirect-extern-access
-Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
+Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0) Save
 Avoid using the GOT to access external symbols.
 
 mrelax
diff --git a/gcc/config/loongarch/loongarch-opts.cc 
b/gcc/config/loongarch/loongarch-opts.cc
index a52e25236ea..e158de9a12f 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -594,3 +594,25 @@ multilib_enabled_abi_list ()
 
   return XOBFINISH (_obstack, const char *);
 }
+
+/* option status 

[PATCH] LoongArch: gcc12: Implement option save/restore.

2024-03-16 Thread Lulu Cheng
LTO option streaming and target attributes both require per-function
target configuration, which is achieved via option save/restore.

We implement TARGET_OPTION_{SAVE,RESTORE} to switch the la_target
context in addition to other automatically maintained option states
(via the "Save" option property in the .opt files).

PR target/113233

gcc/ChangeLog:

* config/loongarch/genopts/loongarch.opt.in: Mark options with
the "Save" property.
* config/loongarch/loongarch-opts.cc
(loongarch_update_gcc_opt_status): Update the value of the
la_target to global_options.
* config/loongarch/loongarch-opts.h
(loongarch_update_gcc_opt_status): Add a function declaration.
* config/loongarch/loongarch.cc
(loongarch_option_override_internal): Call the function
loongarch_update_gcc_opt_status.
(loongarch_option_save): New functions.
(loongarch_option_restore): Likewise.
(TARGET_OPTION_SAVE): Define macro.
(TARGET_OPTION_RESTORE): Likewise.
* config/loongarch/loongarch.opt: Regenerate.
---
 gcc/config/loongarch/genopts/loongarch.opt.in | 22 ++--
 gcc/config/loongarch/loongarch-opts.cc| 22 
 gcc/config/loongarch/loongarch-opts.h |  6 
 gcc/config/loongarch/loongarch.cc | 34 +--
 gcc/config/loongarch/loongarch.opt| 22 ++--
 5 files changed, 82 insertions(+), 24 deletions(-)

diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 420a3941b3b..a3107cb2294 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -58,7 +58,7 @@ EnumValue
 Enum(isa_ext_fpu) String(@@STR_ISA_EXT_FPU64@@) Value(ISA_EXT_FPU64)
 
 m@@OPTSTR_ISA_EXT_FPU@@=
-Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) 
Init(M_OPTION_NOT_SEEN)
+Target RejectNegative Joined ToLower Enum(isa_ext_fpu) Var(la_opt_fpu) 
Init(M_OPTION_NOT_SEEN) Save
 -m@@OPTSTR_ISA_EXT_FPU@@=FPU   Generate code for the given FPU.
 
 m@@OPTSTR_ISA_EXT_FPU@@=@@STR_ISA_EXT_FPU0@@
@@ -92,11 +92,11 @@ EnumValue
 Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
 
 m@@OPTSTR_ARCH@@=
-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) 
Init(M_OPTION_NOT_SEEN)
+Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) 
Init(M_OPTION_NOT_SEEN) Save
 -m@@OPTSTR_ARCH@@=PROCESSORGenerate code for the given PROCESSOR ISA.
 
 m@@OPTSTR_TUNE@@=
-Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) 
Init(M_OPTION_NOT_SEEN)
+Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_tune) 
Init(M_OPTION_NOT_SEEN) Save
 -m@@OPTSTR_TUNE@@=PROCESSORGenerate optimized code for PROCESSOR.
 
 
@@ -127,31 +127,31 @@ int la_opt_abi_ext = M_OPTION_NOT_SEEN
 
 
 mbranch-cost=
-Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
+Target RejectNegative Joined UInteger Var(loongarch_branch_cost) Save
 -mbranch-cost=COST Set the cost of branches to roughly COST instructions.
 
 mcheck-zero-division
-Target Mask(CHECK_ZERO_DIV)
+Target Mask(CHECK_ZERO_DIV) Save
 Trap on integer divide by zero.
 
 mcond-move-int
-Target Var(TARGET_COND_MOVE_INT) Init(1)
+Target Var(TARGET_COND_MOVE_INT) Init(1) Save
 Conditional moves for integral are enabled.
 
 mcond-move-float
-Target Var(TARGET_COND_MOVE_FLOAT) Init(1)
+Target Var(TARGET_COND_MOVE_FLOAT) Init(1) Save
 Conditional moves for float are enabled.
 
 mmemcpy
-Target Mask(MEMCPY)
+Target Mask(MEMCPY) Save
 Prevent optimizing block moves, which is also the default behavior of -Os.
 
 mstrict-align
-Target Var(TARGET_STRICT_ALIGN) Init(0)
+Target Var(TARGET_STRICT_ALIGN) Init(0) Save
 Do not generate unaligned memory accesses.
 
 mmax-inline-memcpy-size=
-Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) 
Init(1024)
+Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) 
Init(1024) Save
 -mmax-inline-memcpy-size=SIZE  Set the max size of memcpy to inline, default 
is 1024.
 
 ; The code model option names for -mcmodel.
@@ -175,7 +175,7 @@ EnumValue
 Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) Value(CMODEL_EXTREME)
 
 mcmodel=
-Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) 
Init(CMODEL_NORMAL)
+Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) 
Init(CMODEL_NORMAL) Save
 Specify the code model.
 
 mrelax
diff --git a/gcc/config/loongarch/loongarch-opts.cc 
b/gcc/config/loongarch/loongarch-opts.cc
index eb9c2a52f9e..b55baeccd2f 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -575,3 +575,25 @@ multilib_enabled_abi_list ()
 
   return XOBFINISH (_obstack, const char *);
 }
+
+/* option status feedback for "gcc --help=target -Q" */
+void
+loongarch_update_gcc_opt_status (struct loongarch_target *target,
+struct 

[PATCH] LoongArch: testsuite: Add compilation options to the regname-fp-s9.c.

2024-03-06 Thread Lulu Cheng
When the value of the macro DEFAULT_CFLAGS is set to '-ansi -pedantic-errors',
regname-s9-fp.c will test to fail. To solve this problem, add the compilation
option '-Wno-pedantic -std=gnu90' to this test case.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/regname-fp-s9.c: Add compilation option
'-Wno-pedantic -std=gnu90'.
---
 gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c 
b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
index d2e3b80f83c..77a74f1f667 100644
--- a/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
+++ b/gcc/testsuite/gcc.target/loongarch/regname-fp-s9.c
@@ -1,3 +1,4 @@
 /* { dg-do compile } */
+/* { dg-additional-options "-Wno-pedantic -std=gnu90" } */
 register long s9 asm("s9"); /* { dg-note "conflicts with 's9'" } */
 register long fp asm("fp"); /* { dg-warning "register of 'fp' used for 
multiple global register variables" } */
-- 
2.39.3



[PATCH v1] LoongArch: Fixed an issue with the implementation of the template atomic_compare_and_swapsi.

2024-03-06 Thread Lulu Cheng
If the hardware does not support LAMCAS, atomic_compare_and_swapsi needs to be
implemented through "ll.w+sc.w". In the implementation of the instruction 
sequence,
it is necessary to determine whether the two registers are equal.
Since LoongArch's comparison instructions do not distinguish between 32-bit
and 64-bit, the two operand registers that need to be compared are symbolically
extended, and one of the operand registers is obtained from memory through the
"ll.w" instruction, which can ensure that the symbolic expansion is carried out.
However, the value of the other operand register is not guaranteed to be the
value of the sign extension.

gcc/ChangeLog:

* config/loongarch/sync.md (atomic_cas_value_strong):
In loongarch64, a sign extension operation is added when
operands[2] is a register operand and the mode is SImode.

gcc/testsuite/ChangeLog:

* g++.target/loongarch/atomic-cas-int.C: New test.
---
 gcc/config/loongarch/sync.md  | 46 ++-
 .../g++.target/loongarch/atomic-cas-int.C | 32 +
 2 files changed, 67 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/loongarch/atomic-cas-int.C

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 8f35a5b48d2..d41c2d26811 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -245,18 +245,42 @@ (define_insn "atomic_cas_value_strong"
(clobber (match_scratch:GPR 5 "="))]
   ""
 {
-  return "1:\\n\\t"
-"ll.\\t%0,%1\\n\\t"
-"bne\\t%0,%z2,2f\\n\\t"
-"or%i3\\t%5,$zero,%3\\n\\t"
-"sc.\\t%5,%1\\n\\t"
-"beqz\\t%5,1b\\n\\t"
-"b\\t3f\\n\\t"
-"2:\\n\\t"
-"%G4\\n\\t"
-"3:\\n\\t";
+  output_asm_insn ("1:", operands);
+  output_asm_insn ("ll.\t%0,%1", operands);
+
+  /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the
+ return value of the val_without_const_folding will not be truncated and
+ will be passed directly to the function compare_exchange_strong.
+ However, the instruction 'bne' does not distinguish between 32-bit and
+ 64-bit operations.  so if the upper 32 bits of the register are not
+ extended by the 32nd bit symbol, then the comparison may not be valid
+ here.  This will affect the result of the operation.  */
+
+  if (TARGET_64BIT && REG_P (operands[2])
+  && GET_MODE (operands[2]) == SImode)
+{
+  output_asm_insn ("addi.w\t%5,%2,0", operands);
+  output_asm_insn ("bne\t%0,%5,2f", operands);
+}
+  else
+output_asm_insn ("bne\t%0,%z2,2f", operands);
+
+  output_asm_insn ("or%i3\t%5,$zero,%3", operands);
+  output_asm_insn ("sc.\t%5,%1", operands);
+  output_asm_insn ("beqz\t%5,1b", operands);
+  output_asm_insn ("b\t3f", operands);
+  output_asm_insn ("2:", operands);
+  output_asm_insn ("%G4", operands);
+  output_asm_insn ("3:", operands);
+
+  return "";
 }
-  [(set (attr "length") (const_int 28))])
+  [(set (attr "length")
+ (if_then_else
+   (and (match_test "GET_MODE (operands[2]) == SImode")
+(match_test "REG_P (operands[2])"))
+   (const_int 32)
+   (const_int 28)))])
 
 (define_insn "atomic_cas_value_strong_amcas"
   [(set (match_operand:QHWD 0 "register_operand" "=")
diff --git a/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C 
b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C
new file mode 100644
index 000..830ce48267a
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/atomic-cas-int.C
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include 
+#include 
+
+__attribute__ ((noinline)) long
+val_without_const_folding (long val)
+{
+  return val;
+}
+
+int
+main ()
+{
+  int oldval = 0xaa;
+  int newval = 0xbb;
+  std::atomic amo;
+
+  amo.store (oldval);
+
+  long longval = val_without_const_folding (0xff80 + oldval);
+  oldval = static_cast (longval);
+
+  amo.compare_exchange_strong (oldval, newval);
+
+  if (newval != amo.load (std::memory_order_relaxed))
+__builtin_abort ();
+
+  return 0;
+}
+
-- 
2.39.3



[PATCH v1] LoongArch: When checking whether the assembler supports conditional branch relaxation, add compilation parameter "--fatal-warnings" to the assembler.

2024-02-20 Thread Lulu Cheng
In binutils 2.40 and earlier versions, only a warning will be reported
when a relocation immediate value is out of bounds. As a result,
the value of the macro HAVE_AS_COND_BRANCH_RELAXATION will also be
defined as 1 when the assembler does not support conditional branch
relaxation. Therefore, add the compilation option "--fatal-warnings"
to avoid this problem.

gcc/ChangeLog:

* configure: Regenerate.
* configure.ac: Add parameter "--fatal-warnings" to assemble
when checking whether the assemble support conditional branch
relaxation.
---
 gcc/configure| 2 +-
 gcc/configure.ac | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/configure b/gcc/configure
index 41b978b0380..f1d434fede0 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -31136,7 +31136,7 @@ else
nop
.endr
beq $a0,$a1,a' > conftest.s
-if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
+if { ac_try='$gcc_cv_as $gcc_cv_as_flags --fatal-warnings -o conftest.o 
conftest.s >&5'
   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
   (eval $ac_try) 2>&5
   ac_status=$?
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 72012d61e67..9ebc578e4cc 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5486,7 +5486,7 @@ x:
[Define if your assembler supports -mrelax option.])])
 gcc_GAS_CHECK_FEATURE([conditional branch relaxation support],
   gcc_cv_as_loongarch_cond_branch_relax,
-  [],
+  [--fatal-warnings],
   [a:
.rept 32769
nop
-- 
2.39.3



[PATCH v1 4/4] LoongArch: Define HAVE_AS_TLS to 0 if it's undefined [PR112299]

2024-02-20 Thread Lulu Cheng
From: Xi Ruoyao 

Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure
building a cross compiler if the cross assembler is not installed yet.

gcc/ChangeLog:

PR target/112299
* config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0
if not defined yet.

(cherry picked from commit 6bf2cebe2bf49919c78814cb447d3aa6e3550d89)
---
 gcc/config/loongarch/loongarch-opts.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index bdf79ecc193..b4115dd7f85 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -95,4 +95,8 @@ loongarch_config_target (struct loongarch_target *target,
 #define HAVE_AS_COND_BRANCH_RELAXATION 0
 #endif
 
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
-- 
2.39.3



[PATCH v1 3/4] LoongArch: Disable relaxation if the assembler don't support conditional branch relaxation [PR112330]

2024-02-20 Thread Lulu Cheng
From: Xi Ruoyao 

As the commit message of r14-4674 has indicated, if the assembler does
not support conditional branch relaxation, a relocation overflow may
happen on conditional branches when relaxation is enabled because the
number of NOP instructions inserted by the assembler will be more than
the number estimated by GCC.

To work around this issue, disable relaxation by default if the
assembler is detected incapable to perform conditional branch relaxation
at GCC build time.  We also need to pass -mno-relax to the assembler to
really disable relaxation.  But, if the assembler does not support
-mrelax option at all, we should not pass -mno-relax to the assembler or
it will immediately error out.  Also handle this with the build time
assembler capability probing, and add a pair of options
-m[no-]pass-mrelax-to-as to allow using a different assembler from the
build-time one.

With this change, if GCC is built with GAS 2.41, relaxation will be
disabled by default.  So the default value of -mexplicit-relocs= is also
changed to 'always' if -mno-relax is specified or implied by the
build-time default, because using assembler macros for symbol addresses
produces no benefit when relaxation is disabled.

gcc/ChangeLog:

PR target/112330
* config/loongarch/genopts/loongarch.opt.in: Add
-m[no]-pass-relax-to-as.  Change the default of -m[no]-relax to
account conditional branch relaxation support status.
* config/loongarch/loongarch.opt: Regenerate.
* configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if
the assembler supports conditional branch relaxation.
* configure: Regenerate.
* config.in: Regenerate.  Note that there are some unrelated
changes introduced by r14-5424 (which does not contain a
config.in regeneration).
* config/loongarch/loongarch-opts.h
(HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined.
* config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT):
Define.
(ASM_MRELAX_SPEC): Define.
(ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}".
* config/loongarch/loongarch.cc: Take the setting of
-m[no-]relax into account when determining the default of
-mexplicit-relocs=.
* doc/invoke.texi: Document -m[no-]relax and
-m[no-]pass-mrelax-to-as for LoongArch.  Update the default
value of -mexplicit-relocs=.

(cherry picked from commit fe23a2ff1f5072559552be0e41ab55bf72f5c79f)
---
 gcc/config.in |  6 
 gcc/config/loongarch/genopts/loongarch.opt.in |  6 +++-
 gcc/config/loongarch/loongarch-opts.h |  4 +++
 gcc/config/loongarch/loongarch.h  | 17 -
 gcc/config/loongarch/loongarch.opt|  6 +++-
 gcc/configure | 35 +++
 gcc/configure.ac  | 10 ++
 gcc/doc/invoke.texi   | 24 -
 8 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index f5b6287a96a..f3bdcb4cdda 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -367,6 +367,12 @@
 #endif
 
 
+/* Define if your assembler supports conditional branch relaxation. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_COND_BRANCH_RELAXATION
+#endif
+
+
 /* Define if your assembler supports the --debug-prefix-map option. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_DEBUG_PREFIX_MAP
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index edc2ed045d7..420a3941b3b 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -179,6 +179,10 @@ Target RejectNegative Joined Enum(cmodel) 
Var(la_opt_cmodel) Init(CMODEL_NORMAL)
 Specify the code model.
 
 mrelax
-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && 
HAVE_AS_COND_BRANCH_RELAXATION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
+
+mpass-mrelax-to-as
+Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
+Pass -mrelax or -mno-relax option to the assembler.
diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index 60e682f57a0..bdf79ecc193 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -91,4 +91,8 @@ loongarch_config_target (struct loongarch_target *target,
 #define HAVE_AS_MRELAX_OPTION 0
 #endif
 
+#ifndef HAVE_AS_COND_BRANCH_RELAXATION
+#define HAVE_AS_COND_BRANCH_RELAXATION 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 8d08b84c8eb..28ab87eb660 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -69,8 +69,23 @@ along with GCC; see the 

[PATCH v1 2/4] LoongArch: Check whether binutils supports the relax function. If supported, explicit relocs are turned off by default.

2024-02-20 Thread Lulu Cheng
gcc/ChangeLog:

* config.in: Regenerate.
* config/loongarch/genopts/loongarch.opt.in: Add compilation option
mrelax. And set the initial value of explicit-relocs according to the
detection status.
* config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the
--no-relax option to the linker.
* config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with
-mno-relax, pass the -mno-relax option to the assembler.
* config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define 
macro.
* config/loongarch/loongarch.opt: Regenerate.
* configure: Regenerate.
* configure.ac: Add detection of support for binutils relax function.

(cherry picked from commint 9bab65a77049edcc7afc59532173206ee816e726)
---
 gcc/config.in | 12 +++
 gcc/config/loongarch/genopts/loongarch.opt.in |  5 +++
 gcc/config/loongarch/gnu-user.h   |  4 +--
 gcc/config/loongarch/loongarch-opts.h |  4 +++
 gcc/config/loongarch/loongarch.opt|  5 +++
 gcc/configure | 31 +++
 gcc/configure.ac  |  4 +++
 7 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index cc638759a40..f5b6287a96a 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -630,6 +630,12 @@
 #endif
 
 
+/* Define if your assembler supports -mrelax option. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_MRELAX_OPTION
+#endif
+
+
 /* Define if your assembler supports .mspabi_attribute. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_MSPABI_ATTRIBUTE
@@ -2214,6 +2220,12 @@
 #endif
 
 
+/* Define which stat syscall is able to handle 64bit indodes. */
+#ifndef USED_FOR_TARGET
+#undef HOST_STAT_FOR_64BIT_INODES
+#endif
+
+
 /* Define as const if the declaration of iconv() needs const. */
 #ifndef USED_FOR_TARGET
 #undef ICONV_CONST
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 61e7d72a0a1..edc2ed045d7 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -177,3 +177,8 @@ Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) 
Value(CMODEL_EXTREME)
 mcmodel=
 Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) 
Init(CMODEL_NORMAL)
 Specify the code model.
+
+mrelax
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Take advantage of linker relaxations to reduce the number of instructions
+required to materialize symbol addresses.
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
index f050078da52..28ac8b0e1f6 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -46,8 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 #define GNU_USER_TARGET_LINK_SPEC \
   "%{G*} %{shared} -m " GNU_USER_LINK_EMULATION \
   "%{!shared: %{static} %{!static: %{rdynamic:-export-dynamic} " \
-  "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
-
+  "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}" \
+  "%{mno-relax: --no-relax}"
 
 /* Similar to standard Linux, but adding -ffast-math support.  */
 #undef GNU_USER_TARGET_MATHFILE_SPEC
diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index eaa6fc07448..60e682f57a0 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -87,4 +87,8 @@ loongarch_config_target (struct loongarch_target *target,
while -m[no]-memcpy imposes a global constraint.  */
 #define TARGET_DO_OPTIMIZE_BLOCK_MOVE_P  loongarch_do_optimize_block_move_p()
 
+#ifndef HAVE_AS_MRELAX_OPTION
+#define HAVE_AS_MRELAX_OPTION 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
diff --git a/gcc/config/loongarch/loongarch.opt 
b/gcc/config/loongarch/loongarch.opt
index 3ff0d860413..78b5e0cc452 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -184,3 +184,8 @@ Enum(cmodel) String(extreme) Value(CMODEL_EXTREME)
 mcmodel=
 Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) 
Init(CMODEL_NORMAL)
 Specify the code model.
+
+mrelax
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Take advantage of linker relaxations to reduce the number of instructions
+required to materialize symbol addresses.
diff --git a/gcc/configure b/gcc/configure
index b4907d258be..67cdd92a4f3 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28871,6 +28871,37 @@ if test $gcc_cv_as_loongarch_dtprelword != yes; then
 $as_echo "#define HAVE_AS_DTPRELWORD 1" >>confdefs.h
 
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mrelax 
option" >&5
+$as_echo_n "checking assembler for -mrelax option... " >&6; }
+if ${gcc_cv_as_loongarch_relax+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_loongarch_relax=no
+  if test x$gcc_cv_as != x; then
+$as_echo '.text' > conftest.s
+if { 

[PATCH v1 1/4] LoongArch: Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP.

2024-02-20 Thread Lulu Cheng
There are two reasons for removing this macro definition:
1. The default in the assembler is to use the nop instruction for filling.
2. For assembly directives: .align [abs-expr[, abs-expr[, abs-expr]]]
   The third expression it is the maximum number of bytes that should be
   skipped by this alignment directive.
   Therefore, it will affect the display of the specified alignment rules
   and affect the operating efficiency.

This modification relies on binutils commit 
1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c.
(Since the assembler will add nop based on the .align information when doing 
relax,
it will cause the conditional branch to go out of bounds during the assembly 
process.
This submission of binutils solves this problem.)

gcc/ChangeLog:

* config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP):
Delete.

Co-authored-by: Chenghua Xu 

(cherry picked from commit b20c7ee066cb7d952fa193972e8bc6362c6e4063)
---
 gcc/config/loongarch/loongarch.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index f34a7a604cc..8d08b84c8eb 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -978,11 +978,6 @@ typedef struct {
 
 #define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG))
 
-/* "nop" instruction 54525952 (andi $r0,$r0,0) is
-   used for padding.  */
-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \
-  fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG))
-
 /* This is how to output an assembler line to advance the location
counter by SIZE bytes.  */
 
-- 
2.39.3



[PATCH v1 0/4] Fix a series of problems caused by ASM_OUTPUT_ALIGN_WITH_NOP (release/gcc-12).

2024-02-20 Thread Lulu Cheng
Because binutils2.42 corrects the implementation of
".align [abs-expr,[abs-expr[,abs-expr]]]".
The macro ASM_OUTPUT_ALIGN_WITH_NOP in GCC uses this assembler directive,
and an error occurs. See link below for detailed description.
https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645067.html

In order to solve the above problems, do the following operations:

1. Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP. (cherry pick r14-4674)
2. Check whether binutils supports the relax function. (cherry pick r14-4160)
3. Disable relaxation if the assembler don't support
  conditional branch relaxation. (cherry pick r14-5434)

PR112299 is also fixed here.

Lulu Cheng (2):
  LoongArch: Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP.
  LoongArch: Check whether binutils supports the relax function. If
supported, explicit relocs are turned off by default.

Xi Ruoyao (2):
  LoongArch: Disable relaxation if the assembler don't support
conditional branch relaxation [PR112330]
  LoongArch: Define HAVE_AS_TLS to 0 if it's undefined [PR112299]

 gcc/config.in | 18 +
 gcc/config/loongarch/genopts/loongarch.opt.in |  9 +++
 gcc/config/loongarch/gnu-user.h   |  4 +-
 gcc/config/loongarch/loongarch-opts.h | 12 
 gcc/config/loongarch/loongarch.h  | 22 +--
 gcc/config/loongarch/loongarch.opt|  9 +++
 gcc/configure | 66 +++
 gcc/configure.ac  | 14 
 gcc/doc/invoke.texi   | 24 ++-
 9 files changed, 169 insertions(+), 9 deletions(-)

-- 
2.39.3



[PATCH v1 3/4] LoongArch: Disable relaxation if the assembler don't support conditional branch relaxation [PR112330]

2024-02-20 Thread Lulu Cheng
From: Xi Ruoyao 

As the commit message of r14-4674 has indicated, if the assembler does
not support conditional branch relaxation, a relocation overflow may
happen on conditional branches when relaxation is enabled because the
number of NOP instructions inserted by the assembler will be more than
the number estimated by GCC.

To work around this issue, disable relaxation by default if the
assembler is detected incapable to perform conditional branch relaxation
at GCC build time.  We also need to pass -mno-relax to the assembler to
really disable relaxation.  But, if the assembler does not support
-mrelax option at all, we should not pass -mno-relax to the assembler or
it will immediately error out.  Also handle this with the build time
assembler capability probing, and add a pair of options
-m[no-]pass-mrelax-to-as to allow using a different assembler from the
build-time one.

With this change, if GCC is built with GAS 2.41, relaxation will be
disabled by default.  So the default value of -mexplicit-relocs= is also
changed to 'always' if -mno-relax is specified or implied by the
build-time default, because using assembler macros for symbol addresses
produces no benefit when relaxation is disabled.

gcc/ChangeLog:

PR target/112330
* config/loongarch/genopts/loongarch.opt.in: Add
-m[no]-pass-relax-to-as.  Change the default of -m[no]-relax to
account conditional branch relaxation support status.
* config/loongarch/loongarch.opt: Regenerate.
* configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if
the assembler supports conditional branch relaxation.
* configure: Regenerate.
* config.in: Regenerate.  Note that there are some unrelated
changes introduced by r14-5424 (which does not contain a
config.in regeneration).
* config/loongarch/loongarch-opts.h
(HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined.
* config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT):
Define.
(ASM_MRELAX_SPEC): Define.
(ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}".
* config/loongarch/loongarch.cc: Take the setting of
-m[no-]relax into account when determining the default of
-mexplicit-relocs=.
* doc/invoke.texi: Document -m[no-]relax and
-m[no-]pass-mrelax-to-as for LoongArch.  Update the default
value of -mexplicit-relocs=.

(cherry picked from commit fe23a2ff1f5072559552be0e41ab55bf72f5c79f)
---
 gcc/config.in |  6 
 gcc/config/loongarch/genopts/loongarch.opt.in |  6 +++-
 gcc/config/loongarch/loongarch-opts.h |  4 +++
 gcc/config/loongarch/loongarch.h  | 17 -
 gcc/config/loongarch/loongarch.opt|  6 +++-
 gcc/configure | 35 +++
 gcc/configure.ac  | 10 ++
 gcc/doc/invoke.texi   | 24 -
 8 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index f5b6287a96a..f3bdcb4cdda 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -367,6 +367,12 @@
 #endif
 
 
+/* Define if your assembler supports conditional branch relaxation. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_COND_BRANCH_RELAXATION
+#endif
+
+
 /* Define if your assembler supports the --debug-prefix-map option. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_DEBUG_PREFIX_MAP
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index edc2ed045d7..420a3941b3b 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -179,6 +179,10 @@ Target RejectNegative Joined Enum(cmodel) 
Var(la_opt_cmodel) Init(CMODEL_NORMAL)
 Specify the code model.
 
 mrelax
-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && 
HAVE_AS_COND_BRANCH_RELAXATION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
+
+mpass-mrelax-to-as
+Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
+Pass -mrelax or -mno-relax option to the assembler.
diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index 60e682f57a0..bdf79ecc193 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -91,4 +91,8 @@ loongarch_config_target (struct loongarch_target *target,
 #define HAVE_AS_MRELAX_OPTION 0
 #endif
 
+#ifndef HAVE_AS_COND_BRANCH_RELAXATION
+#define HAVE_AS_COND_BRANCH_RELAXATION 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 8d08b84c8eb..28ab87eb660 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -69,8 +69,23 @@ along with GCC; see the 

[PATCH v1 2/4] LoongArch: Check whether binutils supports the relax function. If supported, explicit relocs are turned off by default.

2024-02-20 Thread Lulu Cheng
gcc/ChangeLog:

* config.in: Regenerate.
* config/loongarch/genopts/loongarch.opt.in: Add compilation option
mrelax. And set the initial value of explicit-relocs according to the
detection status.
* config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the
--no-relax option to the linker.
* config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with
-mno-relax, pass the -mno-relax option to the assembler.
* config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define 
macro.
* config/loongarch/loongarch.opt: Regenerate.
* configure: Regenerate.
* configure.ac: Add detection of support for binutils relax function.

(cherry picked from commint 9bab65a77049edcc7afc59532173206ee816e726)
---
 gcc/config.in | 12 +++
 gcc/config/loongarch/genopts/loongarch.opt.in |  5 +++
 gcc/config/loongarch/gnu-user.h   |  4 +--
 gcc/config/loongarch/loongarch-opts.h |  4 +++
 gcc/config/loongarch/loongarch.opt|  5 +++
 gcc/configure | 31 +++
 gcc/configure.ac  |  4 +++
 7 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index cc638759a40..f5b6287a96a 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -630,6 +630,12 @@
 #endif
 
 
+/* Define if your assembler supports -mrelax option. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_MRELAX_OPTION
+#endif
+
+
 /* Define if your assembler supports .mspabi_attribute. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_MSPABI_ATTRIBUTE
@@ -2214,6 +2220,12 @@
 #endif
 
 
+/* Define which stat syscall is able to handle 64bit indodes. */
+#ifndef USED_FOR_TARGET
+#undef HOST_STAT_FOR_64BIT_INODES
+#endif
+
+
 /* Define as const if the declaration of iconv() needs const. */
 #ifndef USED_FOR_TARGET
 #undef ICONV_CONST
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 61e7d72a0a1..edc2ed045d7 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -177,3 +177,8 @@ Enum(cmodel) String(@@STR_CMODEL_EXTREME@@) 
Value(CMODEL_EXTREME)
 mcmodel=
 Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) 
Init(CMODEL_NORMAL)
 Specify the code model.
+
+mrelax
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Take advantage of linker relaxations to reduce the number of instructions
+required to materialize symbol addresses.
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
index f050078da52..28ac8b0e1f6 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -46,8 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 #define GNU_USER_TARGET_LINK_SPEC \
   "%{G*} %{shared} -m " GNU_USER_LINK_EMULATION \
   "%{!shared: %{static} %{!static: %{rdynamic:-export-dynamic} " \
-  "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
-
+  "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}" \
+  "%{mno-relax: --no-relax}"
 
 /* Similar to standard Linux, but adding -ffast-math support.  */
 #undef GNU_USER_TARGET_MATHFILE_SPEC
diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index eaa6fc07448..60e682f57a0 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -87,4 +87,8 @@ loongarch_config_target (struct loongarch_target *target,
while -m[no]-memcpy imposes a global constraint.  */
 #define TARGET_DO_OPTIMIZE_BLOCK_MOVE_P  loongarch_do_optimize_block_move_p()
 
+#ifndef HAVE_AS_MRELAX_OPTION
+#define HAVE_AS_MRELAX_OPTION 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
diff --git a/gcc/config/loongarch/loongarch.opt 
b/gcc/config/loongarch/loongarch.opt
index 3ff0d860413..78b5e0cc452 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -184,3 +184,8 @@ Enum(cmodel) String(extreme) Value(CMODEL_EXTREME)
 mcmodel=
 Target RejectNegative Joined Enum(cmodel) Var(la_opt_cmodel) 
Init(CMODEL_NORMAL)
 Specify the code model.
+
+mrelax
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Take advantage of linker relaxations to reduce the number of instructions
+required to materialize symbol addresses.
diff --git a/gcc/configure b/gcc/configure
index b4907d258be..67cdd92a4f3 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28871,6 +28871,37 @@ if test $gcc_cv_as_loongarch_dtprelword != yes; then
 $as_echo "#define HAVE_AS_DTPRELWORD 1" >>confdefs.h
 
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mrelax 
option" >&5
+$as_echo_n "checking assembler for -mrelax option... " >&6; }
+if ${gcc_cv_as_loongarch_relax+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_loongarch_relax=no
+  if test x$gcc_cv_as != x; then
+$as_echo '.text' > conftest.s
+if { 

[PATCH v1 4/4] LoongArch: Define HAVE_AS_TLS to 0 if it's undefined [PR112299]

2024-02-20 Thread Lulu Cheng
From: Xi Ruoyao 

Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure
building a cross compiler if the cross assembler is not installed yet.

gcc/ChangeLog:

PR target/112299
* config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0
if not defined yet.

(cherry picked from commit 6bf2cebe2bf49919c78814cb447d3aa6e3550d89)
---
 gcc/config/loongarch/loongarch-opts.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index bdf79ecc193..b4115dd7f85 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -95,4 +95,8 @@ loongarch_config_target (struct loongarch_target *target,
 #define HAVE_AS_COND_BRANCH_RELAXATION 0
 #endif
 
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
-- 
2.39.3



[PATCH v1 1/4] LoongArch: Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP.

2024-02-20 Thread Lulu Cheng
There are two reasons for removing this macro definition:
1. The default in the assembler is to use the nop instruction for filling.
2. For assembly directives: .align [abs-expr[, abs-expr[, abs-expr]]]
   The third expression it is the maximum number of bytes that should be
   skipped by this alignment directive.
   Therefore, it will affect the display of the specified alignment rules
   and affect the operating efficiency.

This modification relies on binutils commit 
1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c.
(Since the assembler will add nop based on the .align information when doing 
relax,
it will cause the conditional branch to go out of bounds during the assembly 
process.
This submission of binutils solves this problem.)

gcc/ChangeLog:

* config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP):
Delete.

Co-authored-by: Chenghua Xu 

(cherry picked from commit b20c7ee066cb7d952fa193972e8bc6362c6e4063)
---
 gcc/config/loongarch/loongarch.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index f34a7a604cc..8d08b84c8eb 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -978,11 +978,6 @@ typedef struct {
 
 #define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG))
 
-/* "nop" instruction 54525952 (andi $r0,$r0,0) is
-   used for padding.  */
-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \
-  fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG))
-
 /* This is how to output an assembler line to advance the location
counter by SIZE bytes.  */
 
-- 
2.39.3



[PATCH v1 0/4] Fix a series of problems caused by

2024-02-20 Thread Lulu Cheng
Because binutils2.42 corrects the implementation of
".align [abs-expr,[abs-expr[,abs-expr]]]".
The macro ASM_OUTPUT_ALIGN_WITH_NOP in GCC uses this assembler directive,
and an error occurs. See link below for detailed description.
https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645067.html

In order to solve the above problems, do the following operations:

1. Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP. (cherry pick r14-4674)
2. Check whether binutils supports the relax function. (cherry pick r14-4160)
3. Disable relaxation if the assembler don't support
  conditional branch relaxation. (cherry pick r14-5434)

PR112299 is also fixed here.

Lulu Cheng (2):
  LoongArch: Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP.
  LoongArch: Check whether binutils supports the relax function. If
supported, explicit relocs are turned off by default.

Xi Ruoyao (2):
  LoongArch: Disable relaxation if the assembler don't support
conditional branch relaxation [PR112330]
  LoongArch: Define HAVE_AS_TLS to 0 if it's undefined [PR112299]

 gcc/config.in | 18 +
 gcc/config/loongarch/genopts/loongarch.opt.in |  9 +++
 gcc/config/loongarch/gnu-user.h   |  4 +-
 gcc/config/loongarch/loongarch-opts.h | 12 
 gcc/config/loongarch/loongarch.h  | 22 +--
 gcc/config/loongarch/loongarch.opt|  9 +++
 gcc/configure | 66 +++
 gcc/configure.ac  | 14 
 gcc/doc/invoke.texi   | 24 ++-
 9 files changed, 169 insertions(+), 9 deletions(-)

-- 
2.39.3



[PATCH v1 2/4] LoongArch: Check whether binutils supports the relax function. If supported, explicit relocs are turned off by default.

2024-02-20 Thread Lulu Cheng
gcc/ChangeLog:

* config.in: Regenerate.
* config/loongarch/genopts/loongarch.opt.in: Add compilation option
mrelax. And set the initial value of explicit-relocs according to the
detection status.
* config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the
--no-relax option to the linker.
* config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with
-mno-relax, pass the -mno-relax option to the assembler.
* config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define 
macro.
* config/loongarch/loongarch.opt: Regenerate.
* configure: Regenerate.
* configure.ac: Add detection of support for binutils relax function.

(cherry picked from commint 9bab65a77049edcc7afc59532173206ee816e726)
---
 gcc/config.in |  6 
 gcc/config/loongarch/genopts/loongarch.opt.in |  7 -
 gcc/config/loongarch/gnu-user.h   |  3 +-
 gcc/config/loongarch/loongarch-opts.h |  4 +++
 gcc/config/loongarch/loongarch.opt|  7 -
 gcc/configure | 31 +++
 gcc/configure.ac  |  4 +++
 7 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index ef35af16f2f..36a74dd5974 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -636,6 +636,12 @@
 #endif
 
 
+/* Define if your assembler supports -mrelax option. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_MRELAX_OPTION
+#endif
+
+
 /* Define if your assembler supports .mspabi_attribute. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_MSPABI_ATTRIBUTE
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 4b9b4ac273e..e7c32e61a50 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -155,7 +155,7 @@ Target Joined RejectNegative UInteger 
Var(loongarch_max_inline_memcpy_size) Init
 -mmax-inline-memcpy-size=SIZE  Set the max size of memcpy to inline, default 
is 1024.
 
 mexplicit-relocs
-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS)
+Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & 
!HAVE_AS_MRELAX_OPTION)
 Use %reloc() assembly operators.
 
 ; The code model option names for -mcmodel.
@@ -188,3 +188,8 @@ Specify the code model.
 mdirect-extern-access
 Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
 Avoid using the GOT to access external symbols.
+
+mrelax
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Take advantage of linker relaxations to reduce the number of instructions
+required to materialize symbol addresses.
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
index 5f1bd60ada3..e9f4bcef1d4 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -48,7 +48,8 @@ along with GCC; see the file COPYING3.  If not see
   "%{!shared: %{static} " \
   "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \
   "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \
-  "%{static-pie: -static -pie --no-dynamic-linker -z text}}"
+  "%{static-pie: -static -pie --no-dynamic-linker -z text}}" \
+  "%{mno-relax: --no-relax}"
 
 
 /* Similar to standard Linux, but adding -ffast-math support.  */
diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index b1ff54426e4..7ea02f4978c 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -92,4 +92,8 @@ loongarch_config_target (struct loongarch_target *target,
 #define HAVE_AS_EXPLICIT_RELOCS 0
 #endif
 
+#ifndef HAVE_AS_MRELAX_OPTION
+#define HAVE_AS_MRELAX_OPTION 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
diff --git a/gcc/config/loongarch/loongarch.opt 
b/gcc/config/loongarch/loongarch.opt
index 68018ade73f..e37ed9015de 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -162,7 +162,7 @@ Target Joined RejectNegative UInteger 
Var(loongarch_max_inline_memcpy_size) Init
 -mmax-inline-memcpy-size=SIZE  Set the max size of memcpy to inline, default 
is 1024.
 
 mexplicit-relocs
-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS)
+Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & 
!HAVE_AS_MRELAX_OPTION)
 Use %reloc() assembly operators.
 
 ; The code model option names for -mcmodel.
@@ -195,3 +195,8 @@ Specify the code model.
 mdirect-extern-access
 Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
 Avoid using the GOT to access external symbols.
+
+mrelax
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Take advantage of linker relaxations to reduce the number of instructions
+required to materialize symbol addresses.
diff --git a/gcc/configure b/gcc/configure
index dec2eca1a45..760bea9d4a0 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -29075,6 +29075,37 @@ if test 

[PATCH v1 3/4] LoongArch: Disable relaxation if the assembler don't support conditional branch relaxation [PR112330]

2024-02-20 Thread Lulu Cheng
From: Xi Ruoyao 

As the commit message of r14-4674 has indicated, if the assembler does
not support conditional branch relaxation, a relocation overflow may
happen on conditional branches when relaxation is enabled because the
number of NOP instructions inserted by the assembler will be more than
the number estimated by GCC.

To work around this issue, disable relaxation by default if the
assembler is detected incapable to perform conditional branch relaxation
at GCC build time.  We also need to pass -mno-relax to the assembler to
really disable relaxation.  But, if the assembler does not support
-mrelax option at all, we should not pass -mno-relax to the assembler or
it will immediately error out.  Also handle this with the build time
assembler capability probing, and add a pair of options
-m[no-]pass-mrelax-to-as to allow using a different assembler from the
build-time one.

With this change, if GCC is built with GAS 2.41, relaxation will be
disabled by default.  So the default value of -mexplicit-relocs= is also
changed to 'always' if -mno-relax is specified or implied by the
build-time default, because using assembler macros for symbol addresses
produces no benefit when relaxation is disabled.

gcc/ChangeLog:

PR target/112330
* config/loongarch/genopts/loongarch.opt.in: Add
-m[no]-pass-relax-to-as.  Change the default of -m[no]-relax to
account conditional branch relaxation support status.
* config/loongarch/loongarch.opt: Regenerate.
* configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if
the assembler supports conditional branch relaxation.
* configure: Regenerate.
* config.in: Regenerate.  Note that there are some unrelated
changes introduced by r14-5424 (which does not contain a
config.in regeneration).
* config/loongarch/loongarch-opts.h
(HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined.
* config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT):
Define.
(ASM_MRELAX_SPEC): Define.
(ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}".
* config/loongarch/loongarch.cc: Take the setting of
-m[no-]relax into account when determining the default of
-mexplicit-relocs=.
* doc/invoke.texi: Document -m[no-]relax and
-m[no-]pass-mrelax-to-as for LoongArch.  Update the default
value of -mexplicit-relocs=.

(cherry picked from commit fe23a2ff1f5072559552be0e41ab55bf72f5c79f)
---
 gcc/config.in |  6 
 gcc/config/loongarch/genopts/loongarch.opt.in |  6 +++-
 gcc/config/loongarch/loongarch-opts.h |  4 +++
 gcc/config/loongarch/loongarch.h  | 17 -
 gcc/config/loongarch/loongarch.opt|  6 +++-
 gcc/configure | 35 +++
 gcc/configure.ac  | 10 ++
 gcc/doc/invoke.texi   | 24 -
 8 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index 36a74dd5974..83c98ae1457 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -373,6 +373,12 @@
 #endif
 
 
+/* Define if your assembler supports conditional branch relaxation. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_COND_BRANCH_RELAXATION
+#endif
+
+
 /* Define if your assembler supports the --debug-prefix-map option. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_DEBUG_PREFIX_MAP
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index e7c32e61a50..da6fedd153e 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -190,6 +190,10 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
 Avoid using the GOT to access external symbols.
 
 mrelax
-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && 
HAVE_AS_COND_BRANCH_RELAXATION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
+
+mpass-mrelax-to-as
+Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
+Pass -mrelax or -mno-relax option to the assembler.
diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index 7ea02f4978c..edd41c82b17 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -96,4 +96,8 @@ loongarch_config_target (struct loongarch_target *target,
 #define HAVE_AS_MRELAX_OPTION 0
 #endif
 
+#ifndef HAVE_AS_COND_BRANCH_RELAXATION
+#define HAVE_AS_COND_BRANCH_RELAXATION 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index cc719d0c796..d072522e3cf 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -69,8 +69,23 @@ along with GCC; see the file 

[PATCH v1 0/4] Fix a series of problems caused by

2024-02-20 Thread Lulu Cheng
Because binutils2.42 corrects the implementation of
".align [abs-expr,[abs-expr[,abs-expr]]]".
The macro ASM_OUTPUT_ALIGN_WITH_NOP in GCC uses this assembler directive,
and an error occurs. See link below for detailed description.
https://gcc.gnu.org/pipermail/gcc-patches/2024-February/645067.html

In order to solve the above problems, do the following operations:

1. Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP. (cherry pick r14-4674)
2. Check whether binutils supports the relax function. (cherry pick r14-4160)
3. Disable relaxation if the assembler don't support
  conditional branch relaxation. (cherry pick r14-5434)

PR112299 is also fixed here.

Lulu Cheng (2):
  LoongArch: Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP.
  LoongArch: Check whether binutils supports the relax function. If
supported, explicit relocs are turned off by default.

Xi Ruoyao (2):
  LoongArch: Disable relaxation if the assembler don't support
conditional branch relaxation [PR112330]
  LoongArch: Define HAVE_AS_TLS to 0 if it's undefined [PR112299]

 gcc/config.in | 12 
 gcc/config/loongarch/genopts/loongarch.opt.in | 11 +++-
 gcc/config/loongarch/gnu-user.h   |  3 +-
 gcc/config/loongarch/loongarch-opts.h | 12 
 gcc/config/loongarch/loongarch.h  | 22 +--
 gcc/config/loongarch/loongarch.opt| 11 +++-
 gcc/configure | 66 +++
 gcc/configure.ac  | 14 
 gcc/doc/invoke.texi   | 24 ++-
 9 files changed, 165 insertions(+), 10 deletions(-)

-- 
2.39.3



[PATCH v1 1/4] LoongArch: Delete macro definition ASM_OUTPUT_ALIGN_WITH_NOP.

2024-02-20 Thread Lulu Cheng
There are two reasons for removing this macro definition:
1. The default in the assembler is to use the nop instruction for filling.
2. For assembly directives: .align [abs-expr[, abs-expr[, abs-expr]]]
   The third expression it is the maximum number of bytes that should be
   skipped by this alignment directive.
   Therefore, it will affect the display of the specified alignment rules
   and affect the operating efficiency.

This modification relies on binutils commit 
1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c.
(Since the assembler will add nop based on the .align information when doing 
relax,
it will cause the conditional branch to go out of bounds during the assembly 
process.
This submission of binutils solves this problem.)

gcc/ChangeLog:

* config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP):
Delete.

Co-authored-by: Chenghua Xu 

(cherry picked from commit b20c7ee066cb7d952fa193972e8bc6362c6e4063)
---
 gcc/config/loongarch/loongarch.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index f0db67f8c7b..cc719d0c796 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -982,11 +982,6 @@ typedef struct {
 
 #define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG))
 
-/* "nop" instruction 54525952 (andi $r0,$r0,0) is
-   used for padding.  */
-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \
-  fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG))
-
 /* This is how to output an assembler line to advance the location
counter by SIZE bytes.  */
 
-- 
2.39.3



[PATCH v1 4/4] LoongArch: Define HAVE_AS_TLS to 0 if it's undefined [PR112299]

2024-02-20 Thread Lulu Cheng
From: Xi Ruoyao 

Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure
building a cross compiler if the cross assembler is not installed yet.

gcc/ChangeLog:

PR target/112299
* config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0
if not defined yet.

(cherry picked from commit 6bf2cebe2bf49919c78814cb447d3aa6e3550d89)
---
 gcc/config/loongarch/loongarch-opts.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index edd41c82b17..02184e2991a 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -100,4 +100,8 @@ loongarch_config_target (struct loongarch_target *target,
 #define HAVE_AS_COND_BRANCH_RELAXATION 0
 #endif
 
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
 #endif /* LOONGARCH_OPTS_H */
-- 
2.39.3



[PATCH 2/2] LoongArch: Remove redundant symbol type conversions in larchintrin.h.

2024-02-05 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/larchintrin.h (__movgr2fcsr): Remove redundant
symbol type conversions.
(__cacop_d): Likewise.
(__cpucfg): Likewise.
(__asrtle_d): Likewise.
(__asrtgt_d): Likewise.
(__lddir_d): Likewise.
(__ldpte_d): Likewise.
(__crc_w_b_w): Likewise.
(__crc_w_h_w): Likewise.
(__crc_w_w_w): Likewise.
(__crc_w_d_w): Likewise.
(__crcc_w_b_w): Likewise.
(__crcc_w_h_w): Likewise.
(__crcc_w_w_w): Likewise.
(__crcc_w_d_w): Likewise.
(__csrrd_w): Likewise.
(__csrwr_w): Likewise.
(__csrxchg_w): Likewise.
(__csrrd_d): Likewise.
(__csrwr_d): Likewise.
(__csrxchg_d): Likewise.
(__iocsrrd_b): Likewise.
(__iocsrrd_h): Likewise.
(__iocsrrd_w): Likewise.
(__iocsrrd_d): Likewise.
(__iocsrwr_b): Likewise.
(__iocsrwr_h): Likewise.
(__iocsrwr_w): Likewise.
(__iocsrwr_d): Likewise.
(__frecipe_s): Likewise.
(__frecipe_d): Likewise.
(__frsqrte_s): Likewise.
(__frsqrte_d): Likewise.
---
 gcc/config/loongarch/larchintrin.h | 69 ++
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/gcc/config/loongarch/larchintrin.h 
b/gcc/config/loongarch/larchintrin.h
index 04672e71728..0f55bdae838 100644
--- a/gcc/config/loongarch/larchintrin.h
+++ b/gcc/config/loongarch/larchintrin.h
@@ -87,13 +87,13 @@ __rdtimel_w (void)
 /* Assembly instruction format:fcsr, rj.  */
 /* Data types in instruction templates:  VOID, UQI, USI.  */
 #define __movgr2fcsr(/*ui5*/ _1, _2) \
-  __builtin_loongarch_movgr2fcsr ((_1), (unsigned int) _2);
+  __builtin_loongarch_movgr2fcsr ((_1), _2);
 
 #if defined __loongarch64
 /* Assembly instruction format:ui5, rj, si12.  */
 /* Data types in instruction templates:  VOID, USI, UDI, SI.  */
 #define __cacop_d(/*ui5*/ _1, /*unsigned long int*/ _2, /*si12*/ _3) \
-  ((void) __builtin_loongarch_cacop_d ((_1), (unsigned long int) (_2), (_3)))
+  __builtin_loongarch_cacop_d ((_1), (_2), (_3))
 #else
 #error "Unsupported ABI."
 #endif
@@ -104,7 +104,7 @@ extern __inline unsigned int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __cpucfg (unsigned int _1)
 {
-  return (unsigned int) __builtin_loongarch_cpucfg ((unsigned int) _1);
+  return __builtin_loongarch_cpucfg (_1);
 }
 
 #ifdef __loongarch64
@@ -114,7 +114,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __asrtle_d (long int _1, long int _2)
 {
-  __builtin_loongarch_asrtle_d ((long int) _1, (long int) _2);
+  __builtin_loongarch_asrtle_d (_1, _2);
 }
 
 /* Assembly instruction format:rj, rk.  */
@@ -123,7 +123,7 @@ extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __asrtgt_d (long int _1, long int _2)
 {
-  __builtin_loongarch_asrtgt_d ((long int) _1, (long int) _2);
+  __builtin_loongarch_asrtgt_d (_1, _2);
 }
 #endif
 
@@ -131,7 +131,7 @@ __asrtgt_d (long int _1, long int _2)
 /* Assembly instruction format:rd, rj, ui5.  */
 /* Data types in instruction templates:  DI, DI, UQI.  */
 #define __lddir_d(/*long int*/ _1, /*ui5*/ _2) \
-  ((long int) __builtin_loongarch_lddir_d ((long int) (_1), (_2)))
+  __builtin_loongarch_lddir_d ((_1), (_2))
 #else
 #error "Unsupported ABI."
 #endif
@@ -140,7 +140,7 @@ __asrtgt_d (long int _1, long int _2)
 /* Assembly instruction format:rj, ui5.  */
 /* Data types in instruction templates:  VOID, DI, UQI.  */
 #define __ldpte_d(/*long int*/ _1, /*ui5*/ _2) \
-  ((void) __builtin_loongarch_ldpte_d ((long int) (_1), (_2)))
+  __builtin_loongarch_ldpte_d ((_1), (_2))
 #else
 #error "Unsupported ABI."
 #endif
@@ -151,7 +151,7 @@ extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __crc_w_b_w (char _1, int _2)
 {
-  return (int) __builtin_loongarch_crc_w_b_w ((char) _1, (int) _2);
+  return __builtin_loongarch_crc_w_b_w (_1, _2);
 }
 
 /* Assembly instruction format:rd, rj, rk.  */
@@ -160,7 +160,7 @@ extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __crc_w_h_w (short _1, int _2)
 {
-  return (int) __builtin_loongarch_crc_w_h_w ((short) _1, (int) _2);
+  return __builtin_loongarch_crc_w_h_w (_1, _2);
 }
 
 /* Assembly instruction format:rd, rj, rk.  */
@@ -169,7 +169,7 @@ extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __crc_w_w_w (int _1, int _2)
 {
-  return (int) __builtin_loongarch_crc_w_w_w ((int) _1, (int) _2);
+  return __builtin_loongarch_crc_w_w_w (_1, _2);
 }
 
 #ifdef __loongarch64
@@ -179,7 +179,7 @@ extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __crc_w_d_w (long int _1, int _2)
 {
-  return (int) __builtin_loongarch_crc_w_d_w ((long int) _1, (int) _2);
+  return 

[PATCH 1/2] LoongArch: Fix wrong return value type of __iocsrrd_h.

2024-02-05 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/larchintrin.h (__iocsrrd_h): Modify the
function return value type to unsigned short.
---
 gcc/config/loongarch/larchintrin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/larchintrin.h 
b/gcc/config/loongarch/larchintrin.h
index ff2c9f460ac..04672e71728 100644
--- a/gcc/config/loongarch/larchintrin.h
+++ b/gcc/config/loongarch/larchintrin.h
@@ -268,7 +268,7 @@ __iocsrrd_b (unsigned int _1)
 
 /* Assembly instruction format:rd, rj.  */
 /* Data types in instruction templates:  UHI, USI.  */
-extern __inline unsigned char
+extern __inline unsigned short
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __iocsrrd_h (unsigned int _1)
 {
-- 
2.39.3



[PATCH v2] LoongArch: libsanitizer: Enable Lsan and Tsan for loongarch64.

2024-02-03 Thread Lulu Cheng
From: chenguoqi 

libsanitizer/ChangeLog:

* configure.tgt: Enable tsan and lsan for loongarch64.
* tsan/Makefile.am (EXTRA_libtsan_la_SOURCES): Add
tsan_rtl_loongarch64.S.
* tsan/Makefile.in: Regenerate.
---
 libsanitizer/configure.tgt| 5 +
 libsanitizer/tsan/Makefile.am | 2 +-
 libsanitizer/tsan/Makefile.in | 3 ++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/libsanitizer/configure.tgt b/libsanitizer/configure.tgt
index 38fc7001ff7..77a0e68222b 100644
--- a/libsanitizer/configure.tgt
+++ b/libsanitizer/configure.tgt
@@ -79,6 +79,11 @@ case "${target}" in
fi
;;
   loongarch64-*-linux*)
+   if test x$ac_cv_sizeof_void_p = x8; then
+   TSAN_SUPPORTED=yes
+   LSAN_SUPPORTED=yes
+   TSAN_TARGET_DEPENDENT_OBJECTS=tsan_rtl_loongarch64.lo
+   fi
;;
   *)
UNSUPPORTED=1
diff --git a/libsanitizer/tsan/Makefile.am b/libsanitizer/tsan/Makefile.am
index cb8bf2e705e..e8fca16be5f 100644
--- a/libsanitizer/tsan/Makefile.am
+++ b/libsanitizer/tsan/Makefile.am
@@ -50,7 +50,7 @@ tsan_files = \
tsan_vector_clock.cpp
 
 libtsan_la_SOURCES = $(tsan_files)
-EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S 
tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_s390x.S tsan_rtl_riscv64.S
+EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S 
tsan_rtl_loongarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_s390x.S 
tsan_rtl_riscv64.S
 libtsan_la_LIBADD = $(top_builddir)/sanitizer_common/libsanitizer_common.la 
$(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS)
 libtsan_la_DEPENDENCIES = 
$(top_builddir)/sanitizer_common/libsanitizer_common.la 
$(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS)
 if LIBBACKTRACE_SUPPORTED
diff --git a/libsanitizer/tsan/Makefile.in b/libsanitizer/tsan/Makefile.in
index 5cc6f95a40a..5bbdf3915b8 100644
--- a/libsanitizer/tsan/Makefile.in
+++ b/libsanitizer/tsan/Makefile.in
@@ -456,7 +456,7 @@ tsan_files = \
tsan_vector_clock.cpp
 
 libtsan_la_SOURCES = $(tsan_files)
-EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S 
tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_s390x.S tsan_rtl_riscv64.S
+EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S 
tsan_rtl_loongarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_s390x.S 
tsan_rtl_riscv64.S
 libtsan_la_LIBADD =  \
$(top_builddir)/sanitizer_common/libsanitizer_common.la \
$(top_builddir)/interception/libinterception.la \
@@ -614,6 +614,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/tsan_rtl_aarch64.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_access.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_amd64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/tsan_rtl_loongarch64.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_mips64.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_mutex.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_ppc64.Plo@am__quote@
-- 
2.39.3



[PATCH v2] LoongArch: Modify the address calculation logic for obtaining array element values through fp.

2024-01-29 Thread Lulu Cheng
Modify address calculation logic from (((a x C) + fp) + offset) to ((fp + 
offset) + a x C).
Thereby modifying the register dependencies and optimizing the code.
The value of C is 2 4 or 8.

The following is the assembly code before and after a loop modification in 
spec2006 401.bzip:

 old  | new
 735 .L71:|  735 .L71:
 736 slli.d  $r12,$r15,2  |  736 slli.d  $r12,$r15,2
 737 ldx.w   $r13,$r22,$r12   |  737 ldx.w   $r13,$r22,$r12
 738 addi.d  $r15,$r15,-1 |  738 addi.d  $r15,$r15,-1
 739 slli.w  $r16,$r15,0  |  739 slli.w  $r16,$r15,0
 740 addi.w  $r13,$r13,-1 |  740 addi.w  $r13,$r13,-1
 741 slti$r14,$r13,0  |  741 slti$r14,$r13,0
 742 add.w   $r12,$r26,$r13   |  742 add.w   $r12,$r26,$r13
 743 maskeqz $r12,$r12,$r14   |  743 maskeqz $r12,$r12,$r14
 744 masknez $r14,$r13,$r14   |  744 masknez $r14,$r13,$r14
 745 or  $r12,$r12,$r14   |  745 or  $r12,$r12,$r14
 746 ldx.bu  $r14,$r30,$r12   |  746 ldx.bu  $r14,$r30,$r12
 747 lu12i.w $r13,4096>>12|  747 alsl.d  
$r14,$r14,$r18,2
 748 ori $r13,$r13,432|  748 ldptr.w $r13,$r14,0
 749 add.d   $r13,$r13,$r3|  749 addi.w  $r17,$r13,-1
 750 alsl.d  $r14,$r14,$r13,2 |  750 stptr.w $r17,$r14,0
 751 ldptr.w $r13,$r14,-1968  |  751 slli.d  $r13,$r13,2
 752 addi.w  $r17,$r13,-1 |  752 stx.w   $r12,$r22,$r13
 753 st.w$r17,$r14,-1968  |  753 ldptr.w $r12,$r19,0
 754 slli.d  $r13,$r13,2  |  754 blt $r12,$r16,.L71
 755 stx.w   $r12,$r22,$r13   |  755 .align  4
 756 ldptr.w $r12,$r18,-2048  |  756
 757 blt $r12,$r16,.L71   |  757
 758 .align  4|  758

This patch is ported from riscv's commit r14-3111.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (mem_shadd_or_shadd_rtx_p): New 
function.
(loongarch_legitimize_address): Add logical transformation code.

---
v1 -> v2:
  Modify code format and comment information.

---
 gcc/config/loongarch/loongarch.cc | 43 +++
 1 file changed, 43 insertions(+)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index b494040d165..b8f6f6689bb 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3219,6 +3219,22 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode 
mode, rtx *low_out)
   return true;
 }
 
+/* Helper loongarch_legitimize_address.  Given X, return true if it
+   is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
+
+   This respectively represent canonical shift-add rtxs or scaled
+   memory addresses.  */
+static bool
+mem_shadd_or_shadd_rtx_p (rtx x)
+{
+  return ((GET_CODE (x) == ASHIFT
+  || GET_CODE (x) == MULT)
+ && CONST_INT_P (XEXP (x, 1))
+ && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
+ || (GET_CODE (x) == MULT
+ && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3;
+}
+
 /* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
be legitimized in a way that the generic machinery might not expect,
return a new address, otherwise return NULL.  MODE is the mode of
@@ -3242,6 +3258,33 @@ loongarch_legitimize_address (rtx x, rtx oldx 
ATTRIBUTE_UNUSED,
   loongarch_split_plus (x, , );
   if (offset != 0)
 {
+  /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case.  
*/
+  if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
+ && IMM12_OPERAND (offset))
+   {
+ rtx index = XEXP (base, 0);
+ rtx fp = XEXP (base, 1);
+
+ if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
+   {
+ /* If we were given a MULT, we must fix the constant
+as we're going to create the ASHIFT form.  */
+ int shift_val = INTVAL (XEXP (index, 1));
+ if (GET_CODE (index) == MULT)
+   shift_val = exact_log2 (shift_val);
+
+ rtx reg1 = gen_reg_rtx (Pmode);
+ rtx reg3 = gen_reg_rtx (Pmode);
+ loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
+ loongarch_emit_binary (PLUS, reg3,
+gen_rtx_ASHIFT (Pmode, XEXP (index, 0),
+GEN_INT (shift_val)),
+reg1);
+
+ return reg3;
+   }
+   }
+
   if (!loongarch_valid_base_register_p (base, mode, false))
base = 

[PATCH] LoongArch: Modify the address calculation logic for obtaining array element values through fp.

2024-01-29 Thread Lulu Cheng
Modify address calculation logic from (((a x C) + fp) + offset) to ((fp + 
offset) + a x C).
Thereby modifying the register dependencies and optimizing the code.
The value of C is 2 4 or 8.

The following is the assembly code before and after a loop modification in 
spec2006 401.bzip:

 old  | new
 735 .L71:|  735 .L71:
 736 slli.d  $r12,$r15,2  |  736 slli.d  $r12,$r15,2
 737 ldx.w   $r13,$r22,$r12   |  737 ldx.w   $r13,$r22,$r12
 738 addi.d  $r15,$r15,-1 |  738 addi.d  $r15,$r15,-1
 739 slli.w  $r16,$r15,0  |  739 slli.w  $r16,$r15,0
 740 addi.w  $r13,$r13,-1 |  740 addi.w  $r13,$r13,-1
 741 slti$r14,$r13,0  |  741 slti$r14,$r13,0
 742 add.w   $r12,$r26,$r13   |  742 add.w   $r12,$r26,$r13
 743 maskeqz $r12,$r12,$r14   |  743 maskeqz $r12,$r12,$r14
 744 masknez $r14,$r13,$r14   |  744 masknez $r14,$r13,$r14
 745 or  $r12,$r12,$r14   |  745 or  $r12,$r12,$r14
 746 ldx.bu  $r14,$r30,$r12   |  746 ldx.bu  $r14,$r30,$r12
 747 lu12i.w $r13,4096>>12|  747 alsl.d  
$r14,$r14,$r18,2
 748 ori $r13,$r13,432|  748 ldptr.w $r13,$r14,0
 749 add.d   $r13,$r13,$r3|  749 addi.w  $r17,$r13,-1
 750 alsl.d  $r14,$r14,$r13,2 |  750 stptr.w $r17,$r14,0
 751 ldptr.w $r13,$r14,-1968  |  751 slli.d  $r13,$r13,2
 752 addi.w  $r17,$r13,-1 |  752 stx.w   $r12,$r22,$r13
 753 st.w$r17,$r14,-1968  |  753 ldptr.w $r12,$r19,0
 754 slli.d  $r13,$r13,2  |  754 blt $r12,$r16,.L71
 755 stx.w   $r12,$r22,$r13   |  755 .align  4
 756 ldptr.w $r12,$r18,-2048  |  756
 757 blt $r12,$r16,.L71   |  757
 758 .align  4|  758

This patch is ported from riscv's commit r14-3111.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (mem_shadd_or_shadd_rtx_p): New 
function.
(loongarch_legitimize_address): Add logical transformation code.
---
 gcc/config/loongarch/loongarch.cc | 40 +++
 1 file changed, 40 insertions(+)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index b494040d165..62e74207042 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3219,6 +3219,22 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode 
mode, rtx *low_out)
   return true;
 }
 
+/* Helper for riscv_legitimize_address. Given X, return true if it
+   is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
+
+   This respectively represent canonical shift-add rtxs or scaled
+   memory addresses.  */
+static bool
+mem_shadd_or_shadd_rtx_p (rtx x)
+{
+  return ((GET_CODE (x) == ASHIFT
+  || GET_CODE (x) == MULT)
+ && CONST_INT_P (XEXP (x, 1))
+ && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
+ || (GET_CODE (x) == MULT
+ && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3;
+}
+
 /* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
be legitimized in a way that the generic machinery might not expect,
return a new address, otherwise return NULL.  MODE is the mode of
@@ -3242,6 +3258,30 @@ loongarch_legitimize_address (rtx x, rtx oldx 
ATTRIBUTE_UNUSED,
   loongarch_split_plus (x, , );
   if (offset != 0)
 {
+  /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case.  
*/
+  if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
+ && IMM12_OPERAND (offset))
+   {
+ rtx index = XEXP (base, 0);
+ rtx fp = XEXP (base, 1);
+
+ if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
+   {
+ /* If we were given a MULT, we must fix the constant
+as we're going to create the ASHIFT form.  */
+ int shift_val = INTVAL (XEXP (index, 1));
+ if (GET_CODE (index) == MULT)
+   shift_val = exact_log2 (shift_val);
+
+ rtx reg1 = gen_reg_rtx (Pmode);
+ rtx reg3 = gen_reg_rtx (Pmode);
+ loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
+ loongarch_emit_binary (PLUS, reg3, gen_rtx_ASHIFT (Pmode, XEXP 
(index, 0), GEN_INT (shift_val)), reg1);
+
+ return reg3;
+   }
+   }
+
   if (!loongarch_valid_base_register_p (base, mode, false))
base = copy_to_mode_reg (Pmode, base);
   addr = loongarch_add_offset (NULL, base, offset);
-- 
2.39.3



[PATCH] LoongArch: libsanitizer: Enable build lsan and tsan for loongarch64.

2024-01-29 Thread Lulu Cheng
From: chenguoqi 

libsanitizer/ChangeLog:

* configure.tgt: Enable tsan and lsan for loongarch64.
* tsan/Makefile.am: Add tsan_rtl_loongarch64.S to 
EXTRA_libtsan_la_SOURCES.
* tsan/Makefile.in: Regenerate.
---
 libsanitizer/configure.tgt| 5 +
 libsanitizer/tsan/Makefile.am | 2 +-
 libsanitizer/tsan/Makefile.in | 3 ++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/libsanitizer/configure.tgt b/libsanitizer/configure.tgt
index 38fc7001ff7..77a0e68222b 100644
--- a/libsanitizer/configure.tgt
+++ b/libsanitizer/configure.tgt
@@ -79,6 +79,11 @@ case "${target}" in
fi
;;
   loongarch64-*-linux*)
+   if test x$ac_cv_sizeof_void_p = x8; then
+   TSAN_SUPPORTED=yes
+   LSAN_SUPPORTED=yes
+   TSAN_TARGET_DEPENDENT_OBJECTS=tsan_rtl_loongarch64.lo
+   fi
;;
   *)
UNSUPPORTED=1
diff --git a/libsanitizer/tsan/Makefile.am b/libsanitizer/tsan/Makefile.am
index cb8bf2e705e..e8fca16be5f 100644
--- a/libsanitizer/tsan/Makefile.am
+++ b/libsanitizer/tsan/Makefile.am
@@ -50,7 +50,7 @@ tsan_files = \
tsan_vector_clock.cpp
 
 libtsan_la_SOURCES = $(tsan_files)
-EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S 
tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_s390x.S tsan_rtl_riscv64.S
+EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S 
tsan_rtl_loongarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_s390x.S 
tsan_rtl_riscv64.S
 libtsan_la_LIBADD = $(top_builddir)/sanitizer_common/libsanitizer_common.la 
$(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS)
 libtsan_la_DEPENDENCIES = 
$(top_builddir)/sanitizer_common/libsanitizer_common.la 
$(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS)
 if LIBBACKTRACE_SUPPORTED
diff --git a/libsanitizer/tsan/Makefile.in b/libsanitizer/tsan/Makefile.in
index 5cc6f95a40a..5bbdf3915b8 100644
--- a/libsanitizer/tsan/Makefile.in
+++ b/libsanitizer/tsan/Makefile.in
@@ -456,7 +456,7 @@ tsan_files = \
tsan_vector_clock.cpp
 
 libtsan_la_SOURCES = $(tsan_files)
-EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S 
tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_s390x.S tsan_rtl_riscv64.S
+EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S 
tsan_rtl_loongarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_s390x.S 
tsan_rtl_riscv64.S
 libtsan_la_LIBADD =  \
$(top_builddir)/sanitizer_common/libsanitizer_common.la \
$(top_builddir)/interception/libinterception.la \
@@ -614,6 +614,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/tsan_rtl_aarch64.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_access.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_amd64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/tsan_rtl_loongarch64.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_mips64.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_mutex.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tsan_rtl_ppc64.Plo@am__quote@
-- 
2.39.3



[PATCH v5 1/5] LoongArch: Merge template got_load_tls_{ld/gd/le/ie}.

2024-01-29 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_load_tls):
Load all types of tls symbols through one function.
(loongarch_got_load_tls_gd): Delete.
(loongarch_got_load_tls_ld): Delete.
(loongarch_got_load_tls_ie): Delete.
(loongarch_got_load_tls_le): Delete.
(loongarch_call_tls_get_addr): Modify the called function name.
(loongarch_legitimize_tls_address): Likewise.
* config/loongarch/loongarch.md (@got_load_tls_gd): Delete.
(@load_tls): New template.
(@got_load_tls_ld): Delete.
(@got_load_tls_le): Delete.
(@got_load_tls_ie): Delete.
---
 gcc/config/loongarch/loongarch.cc | 47 +---
 gcc/config/loongarch/loongarch.md | 59 ---
 2 files changed, 30 insertions(+), 76 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index b494040d165..7b4edf1c1fd 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2736,36 +2736,12 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT 
offset)
 /* The __tls_get_attr symbol.  */
 static GTY (()) rtx loongarch_tls_symbol;
 
-/* Load an entry from the GOT for a TLS GD access.  */
+/* Load an entry for a TLS access.  */
 
 static rtx
-loongarch_got_load_tls_gd (rtx dest, rtx sym)
+loongarch_load_tls (rtx dest, rtx sym)
 {
-  return gen_got_load_tls_gd (Pmode, dest, sym);
-}
-
-/* Load an entry from the GOT for a TLS LD access.  */
-
-static rtx
-loongarch_got_load_tls_ld (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_ld (Pmode, dest, sym);
-}
-
-/* Load an entry from the GOT for a TLS IE access.  */
-
-static rtx
-loongarch_got_load_tls_ie (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_ie (Pmode, dest, sym);
-}
-
-/* Add in the thread pointer for a TLS LE access.  */
-
-static rtx
-loongarch_got_load_tls_le (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_le (Pmode, dest, sym);
+  return gen_load_tls (Pmode, dest, sym);
 }
 
 /* Return an instruction sequence that calls __tls_get_addr.  SYM is
@@ -2809,14 +2785,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
emit_insn (gen_tls_low (Pmode, a0, high, loc));
 }
   else
-{
-  if (type == SYMBOL_TLSLDM)
-   emit_insn (loongarch_got_load_tls_ld (a0, loc));
-  else if (type == SYMBOL_TLSGD)
-   emit_insn (loongarch_got_load_tls_gd (a0, loc));
-  else
-   gcc_unreachable ();
-}
+emit_insn (loongarch_load_tls (a0, loc));
 
   if (flag_plt)
 {
@@ -2953,10 +2922,10 @@ loongarch_legitimize_tls_address (rtx loc)
  /* la.tls.ie; tp-relative add.  */
  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
  tmp1 = gen_reg_rtx (Pmode);
+ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  dest = gen_reg_rtx (Pmode);
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
  high = loongarch_force_temporary (tmp3, high);
@@ -2979,7 +2948,7 @@ loongarch_legitimize_tls_address (rtx loc)
emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
}
  else
-   emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
+   emit_insn (loongarch_load_tls (tmp1, tmp2));
  emit_insn (gen_add3_insn (dest, tmp1, tp));
}
   break;
@@ -3011,11 +2980,11 @@ loongarch_legitimize_tls_address (rtx loc)
 
  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
  tmp1 = gen_reg_rtx (Pmode);
+ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  dest = gen_reg_rtx (Pmode);
 
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
  high = loongarch_force_temporary (tmp3, high);
@@ -3043,7 +3012,7 @@ loongarch_legitimize_tls_address (rtx loc)
}
}
  else
-   emit_insn (loongarch_got_load_tls_le (tmp1, loc));
+   emit_insn (loongarch_load_tls (tmp1, tmp2));
  emit_insn (gen_add3_insn (dest, tmp1, tp));
}
   break;
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index dda3cdf8be5..231c6568c85 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -51,10 +51,7 @@ (define_c_enum "unspec" [
   UNSPEC_BITREV_8B
 
   ;; TLS
-  UNSPEC_TLS_GD
-  UNSPEC_TLS_LD
-  UNSPEC_TLS_LE
-  UNSPEC_TLS_IE
+  UNSPEC_TLS
 
   ;; Stack tie
   UNSPEC_TIE
@@ -2701,45 +2698,33 @@ (define_insn "store_word"
 
 ;; Thread-Local Storage
 
-(define_insn "@got_load_tls_gd"
+(define_insn 

[PATCH v5 5/5] LoongArch: Don't split the instructions containing relocs for extreme code model.

2024-01-29 Thread Lulu Cheng
From: Xi Ruoyao 

The ABI mandates the pcalau12i/addi.d/lu32i.d/lu52i.d instructions for
addressing a symbol to be adjacent.  So model them as "one large
instruction", i.e. define_insn, with two output registers.  The real
address is the sum of these two registers.

The advantage of this approach is the RTL passes can still use ldx/stx
instructions to skip an addi.d instruction.

gcc/ChangeLog:

* config/loongarch/loongarch.md (unspec): Add
UNSPEC_LA_PCREL_64_PART1 and UNSPEC_LA_PCREL_64_PART2.
(la_pcrel64_two_parts): New define_insn.
* config/loongarch/loongarch.cc (loongarch_tls_symbol): Fix a
typo in the comment.
(loongarch_call_tls_get_addr): If -mcmodel=extreme
-mexplicit-relocs={always,auto}, use la_pcrel64_two_parts for
addressing the TLS symbol and __tls_get_addr.  Emit an REG_EQUAL
note to allow CSE addressing __tls_get_addr.
(loongarch_legitimize_tls_address): If -mcmodel=extreme
-mexplicit-relocs={always,auto}, address TLS IE symbols with
la_pcrel64_two_parts.
(loongarch_split_symbol): If -mcmodel=extreme
-mexplicit-relocs={always,auto}, address symbols with
la_pcrel64_two_parts.
(loongarch_output_mi_thunk): Clean up unreachable code.  If
-mcmodel=extreme -mexplicit-relocs={always,auto}, address the MI
thunks with la_pcrel64_two_parts.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/func-call-extreme-1.c (dg-options):
Use -O2 instead of -O0 to ensure the pcalau12i/addi/lu32i/lu52i
instruction sequences are not reordered by the compiler.
(NOIPA): Disallow interprocedural optimizations.
* gcc.target/loongarch/func-call-extreme-2.c: Remove the content
duplicated from func-call-extreme-1.c, include it instead.
(dg-options): Likewise.
* gcc.target/loongarch/func-call-extreme-3.c (dg-options):
Likewise.
* gcc.target/loongarch/func-call-extreme-4.c (dg-options):
Likewise.
* gcc.target/loongarch/cmodel-extreme-1.c: New test.
* gcc.target/loongarch/cmodel-extreme-2.c: New test.
* g++.target/loongarch/cmodel-extreme-mi-thunk-1.C: New test.
* g++.target/loongarch/cmodel-extreme-mi-thunk-2.C: New test.
* g++.target/loongarch/cmodel-extreme-mi-thunk-3.C: New test.
---
 gcc/config/loongarch/loongarch.cc | 131 ++
 gcc/config/loongarch/loongarch.md |  20 +++
 .../loongarch/cmodel-extreme-mi-thunk-1.C |  11 ++
 .../loongarch/cmodel-extreme-mi-thunk-2.C |   6 +
 .../loongarch/cmodel-extreme-mi-thunk-3.C |   6 +
 .../gcc.target/loongarch/cmodel-extreme-1.c   |  18 +++
 .../gcc.target/loongarch/cmodel-extreme-2.c   |   7 +
 .../loongarch/func-call-extreme-1.c   |  14 +-
 .../loongarch/func-call-extreme-2.c   |  29 +---
 .../loongarch/func-call-extreme-3.c   |   2 +-
 .../loongarch/func-call-extreme-4.c   |   2 +-
 11 files changed, 154 insertions(+), 92 deletions(-)
 create mode 100644 
gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C
 create mode 100644 
gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C
 create mode 100644 
gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C
 create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 564de9c2642..89dd33553da 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2737,7 +2737,7 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT 
offset)
   return plus_constant (Pmode, reg, offset);
 }
 
-/* The __tls_get_attr symbol.  */
+/* The __tls_get_addr symbol.  */
 static GTY (()) rtx loongarch_tls_symbol;
 
 /* Load an entry for a TLS access.  */
@@ -2777,20 +2777,22 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
   if (loongarch_explicit_relocs_p (type))
 {
-  /* Split tls symbol to high and low.  */
-  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
-  high = loongarch_force_temporary (tmp, high);
-
   if (TARGET_CMODEL_EXTREME)
{
- rtx tmp1 = gen_reg_rtx (Pmode);
- emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
- emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
- emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
- emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
+ rtx part1 = gen_reg_rtx (Pmode);
+ rtx part2 = gen_reg_rtx (Pmode);
+
+ emit_insn (gen_la_pcrel64_two_parts (part1, part2, loc));
+ emit_move_insn (a0, gen_rtx_PLUS (Pmode, part1, part2));
}
   else
-   emit_insn (gen_tls_low (Pmode, a0, high, loc));
+   {
+ /* Split tls symbol to high and low.  */
+ rtx high = 

[PATCH v5 3/5] LoongArch: Enable explicit reloc for extreme TLS GD/LD with -mexplicit-relocs=auto.

2024-01-29 Thread Lulu Cheng
Binutils does not support relaxation using four instructions to obtain
symbol addresses

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
When the code model of the symbol is extreme and -mexplicit-relocs=auto,
the macro instruction loading symbol address is not applicable.
(loongarch_call_tls_get_addr): Adjust code.
(loongarch_legitimize_tls_address): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c: New 
test.
* gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c: New 
test.
---
 gcc/config/loongarch/loongarch.cc | 19 +--
 .../explicit-relocs-extreme-auto-tls-ld-gd.c  |  5 +
 .../explicit-relocs-medium-auto-tls-ld-gd.c   |  5 +
 3 files changed, 19 insertions(+), 10 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index a0c14f908a8..684ae81870c 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1971,6 +1971,10 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type 
type)
   if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
 return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
 
+  /* The linker don't know how to relax accesses in extreme code model.  */
+  if (loongarch_symbol_extreme_p (type))
+return true;
+
   switch (type)
 {
   case SYMBOL_TLS_IE:
@@ -1982,11 +1986,6 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type 
type)
   does not relax 64-bit pc-relative accesses as at now.  */
return true;
   case SYMBOL_GOT_DISP:
-   /* The linker don't know how to relax GOT accesses in extreme
-  code model.  */
-   if (TARGET_CMODEL_EXTREME)
- return true;
-
/* If we are performing LTO for a final link, and we have the
   linker plugin so we know the resolution of the symbols, then
   all GOT references are binding to external symbols or
@@ -2776,7 +2775,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
   start_sequence ();
 
-  if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
+  if (loongarch_explicit_relocs_p (type))
 {
   /* Split tls symbol to high and low.  */
   rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
@@ -2809,7 +2808,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
case CMODEL_MEDIUM:
{
  rtx reg = gen_reg_rtx (Pmode);
- if (TARGET_EXPLICIT_RELOCS)
+ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
  emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
  rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
@@ -2845,7 +2844,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
case CMODEL_NORMAL:
case CMODEL_MEDIUM:
{
- if (TARGET_EXPLICIT_RELOCS)
+ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP))
{
  rtx high = gen_reg_rtx (Pmode);
  loongarch_emit_move (high,
@@ -2939,7 +2938,7 @@ loongarch_legitimize_tls_address (rtx loc)
  tmp1 = gen_reg_rtx (Pmode);
  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  dest = gen_reg_rtx (Pmode);
- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE))
{
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
@@ -2996,7 +2995,7 @@ loongarch_legitimize_tls_address (rtx loc)
  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  dest = gen_reg_rtx (Pmode);
 
- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ if (loongarch_explicit_relocs_p (SYMBOL_TLS_LE))
{
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
new file mode 100644
index 000..35bd4570a9e
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=extreme -fno-plt" } 
*/
+/* { dg-final { scan-assembler-not "la.tls.\[lg\]d" { target tls_native } } } 
*/
+
+#include "./explicit-relocs-auto-tls-ld-gd.c"
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c 

[PATCH v5 2/5] LoongArch: Add the macro implementation of mcmodel=extreme.

2024-01-29 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch-protos.h (loongarch_symbol_extreme_p):
Add function declaration.
* config/loongarch/loongarch.cc (loongarch_symbolic_constant_p):
For SYMBOL_PCREL64, non-zero addend of "la.local $rd,$rt,sym+addend"
is not allowed
(loongarch_load_tls): Added macro support in extreme mode.
(loongarch_call_tls_get_addr): Likewise.
(loongarch_legitimize_tls_address): Likewise.
(loongarch_force_address): Likewise.
(loongarch_legitimize_move): Likewise.
(loongarch_output_mi_thunk): Likewise.
(loongarch_option_override_internal): Remove the code that detects
explicit relocs status.
(loongarch_handle_model_attribute): Likewise.
* config/loongarch/loongarch.md (movdi_symbolic_off64): New template.
* config/loongarch/predicates.md (symbolic_off64_operand): New 
predicate.
(symbolic_off64_or_reg_operand): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/attr-model-5.c: New test.
* gcc.target/loongarch/func-call-extreme-5.c: New test.
* gcc.target/loongarch/func-call-extreme-6.c: New test.
* gcc.target/loongarch/tls-extreme-macro.c: New test.
---
 gcc/config/loongarch/loongarch-protos.h   |   1 +
 gcc/config/loongarch/loongarch.cc | 110 +++---
 gcc/config/loongarch/loongarch.md |  48 +++-
 gcc/config/loongarch/predicates.md|  12 ++
 .../gcc.target/loongarch/attr-model-5.c   |   8 ++
 .../loongarch/func-call-extreme-5.c   |   7 ++
 .../loongarch/func-call-extreme-6.c   |   7 ++
 .../gcc.target/loongarch/tls-extreme-macro.c  |  35 ++
 8 files changed, 184 insertions(+), 44 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 9ffc92afead..1fdfda9af01 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -222,4 +222,5 @@ extern rtx loongarch_build_signbit_mask (machine_mode, 
bool, bool);
 extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool);
 extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode);
 extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
+extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type);
 #endif /* ! GCC_LOONGARCH_PROTOS_H */
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 7b4edf1c1fd..a0c14f908a8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1935,8 +1935,13 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
  relocations.  */
   switch (*symbol_type)
 {
-case SYMBOL_PCREL:
 case SYMBOL_PCREL64:
+  /* When the code model is extreme, the non-zero offset situation
+has not been handled well, so it is disabled here now.  */
+  if (!loongarch_explicit_relocs_p (SYMBOL_PCREL64))
+   return false;
+/* fall through */
+case SYMBOL_PCREL:
   /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
   return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
 
@@ -2739,9 +2744,15 @@ static GTY (()) rtx loongarch_tls_symbol;
 /* Load an entry for a TLS access.  */
 
 static rtx
-loongarch_load_tls (rtx dest, rtx sym)
+loongarch_load_tls (rtx dest, rtx sym, enum loongarch_symbol_type type)
 {
-  return gen_load_tls (Pmode, dest, sym);
+  /* TLS LE gets a 32 or 64 bit offset here, so one register can do it.  */
+  if (type == SYMBOL_TLS_LE)
+return gen_load_tls (Pmode, dest, sym);
+
+  return loongarch_symbol_extreme_p (type)
+? gen_movdi_symbolic_off64 (dest, sym, gen_reg_rtx (DImode))
+: gen_load_tls (Pmode, dest, sym);
 }
 
 /* Return an instruction sequence that calls __tls_get_addr.  SYM is
@@ -2773,8 +2784,6 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
   if (TARGET_CMODEL_EXTREME)
{
- gcc_assert (TARGET_EXPLICIT_RELOCS);
-
  rtx tmp1 = gen_reg_rtx (Pmode);
  emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
@@ -2785,7 +2794,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
emit_insn (gen_tls_low (Pmode, a0, high, loc));
 }
   else
-emit_insn (loongarch_load_tls (a0, loc));
+emit_insn (loongarch_load_tls (a0, loc, type));
 
   if (flag_plt)
 {
@@ -2852,22 +2861,28 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
case CMODEL_EXTREME:

[PATCH v5 0/5] When cmodel=extreme, add macro implementation and fix problems with explicit relos implementation.

2024-01-29 Thread Lulu Cheng
When cmodel=extreme, since the symbol address is obtained through four 
instructions,
errors may occur in some cases during linking. Xi Ruoyao fixes this problem.

https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc#extreme-code-model


v4 -> v5:
  1. Modify code format.
  2. Add the implementation patch submitted by Xi Ruoyao about 
'-mcmodel=extreme -mexplicit-relocs=always'.

v3 -> v4:
  1. Add macro support for TLS symbols
  2. Added support for loading __get_tls_addr symbol address using call36.
  3. Merge template got_load_tls_{ld/gd/le/ie}.
  4. Enable explicit reloc for extreme TLS GD/LD with -mexplicit-relocs=auto.


v2 -> v3:
  1. Modify the detection rules of a test case.

v1 -> v2:
  1. Use the temporarily allocated registers as intermediate registers to 
implement the extreme macro.
  2. Fixed bugs in v1 test cases.



Lulu Cheng (4):
  LoongArch: Merge template got_load_tls_{ld/gd/le/ie}.
  LoongArch: Add the macro implementation of mcmodel=extreme.
  LoongArch: Enable explicit reloc for extreme TLS GD/LD with
-mexplicit-relocs=auto.
  LoongArch: Added support for loading __get_tls_addr symbol address
using call36.

Xi Ruoyao (1):
  LoongArch: Don't split the instructions containing relocs for extreme
code model.

 gcc/config/loongarch/loongarch-protos.h   |   1 +
 gcc/config/loongarch/loongarch.cc | 265 ++
 gcc/config/loongarch/loongarch.md | 125 ++---
 gcc/config/loongarch/predicates.md|  12 +
 .../loongarch/cmodel-extreme-mi-thunk-1.C |  11 +
 .../loongarch/cmodel-extreme-mi-thunk-2.C |   6 +
 .../loongarch/cmodel-extreme-mi-thunk-3.C |   6 +
 .../gcc.target/loongarch/attr-model-5.c   |   8 +
 .../gcc.target/loongarch/cmodel-extreme-1.c   |  18 ++
 .../gcc.target/loongarch/cmodel-extreme-2.c   |   7 +
 .../explicit-relocs-extreme-auto-tls-ld-gd.c  |   5 +
 .../explicit-relocs-medium-auto-tls-ld-gd.c   |   5 +
 ...icit-relocs-medium-call36-auto-tls-ld-gd.c |   5 +
 .../loongarch/func-call-extreme-1.c   |  14 +-
 .../loongarch/func-call-extreme-2.c   |  29 +-
 .../loongarch/func-call-extreme-3.c   |   2 +-
 .../loongarch/func-call-extreme-4.c   |   2 +-
 .../loongarch/func-call-extreme-5.c   |   7 +
 .../loongarch/func-call-extreme-6.c   |   7 +
 .../gcc.target/loongarch/tls-extreme-macro.c  |  35 +++
 20 files changed, 375 insertions(+), 195 deletions(-)
 create mode 100644 
gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-1.C
 create mode 100644 
gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-2.C
 create mode 100644 
gcc/testsuite/g++.target/loongarch/cmodel-extreme-mi-thunk-3.C
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/cmodel-extreme-2.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c

-- 
2.39.3



[PATCH v5 4/5] LoongArch: Added support for loading __get_tls_addr symbol address using call36.

2024-01-29 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
Add support for call36.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: 
New test.
---
 gcc/config/loongarch/loongarch.cc | 22 ++-
 ...icit-relocs-medium-call36-auto-tls-ld-gd.c |  5 +
 2 files changed, 21 insertions(+), 6 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 684ae81870c..564de9c2642 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2807,17 +2807,27 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
case CMODEL_MEDIUM:
{
- rtx reg = gen_reg_rtx (Pmode);
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
- rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
-   loongarch_tls_symbol,
-   const0_rtx);
- insn = emit_call_insn (call);
+ rtx call;
+
+if (HAVE_AS_SUPPORT_CALL36)
+  call = gen_call_value_internal (v0, loongarch_tls_symbol,
+  const0_rtx);
+else
+  {
+rtx reg = gen_reg_rtx (Pmode);
+emit_insn (gen_pcalau12i (Pmode, reg,
+  loongarch_tls_symbol));
+call = gen_call_value_internal_1 (Pmode, v0, reg,
+  loongarch_tls_symbol,
+  const0_rtx);
+  }
+insn = emit_call_insn (call);
}
  else
{
+ rtx reg = gen_reg_rtx (Pmode);
  emit_move_insn (reg, loongarch_tls_symbol);
  insn = emit_call_insn (gen_call_value_internal (v0,
  reg,
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
new file mode 100644
index 000..d1a4820834c
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */
+/* { dg-final { scan-assembler 
"pcaddu18i\t\\\$r1,%call36\\\(__tls_get_addr\\\)" { target { tls_native && 
loongarch_call36_support } } } } */
+
+#include "./explicit-relocs-auto-tls-ld-gd.c"
-- 
2.39.3



[PATCH v4 1/4] LoongArch: Merge template got_load_tls_{ld/gd/le/ie}.

2024-01-25 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_load_tls):
Load all types of tls symbols through one function.
(loongarch_got_load_tls_gd): Delete.
(loongarch_got_load_tls_ld): Delete.
(loongarch_got_load_tls_ie): Delete.
(loongarch_got_load_tls_le): Delete.
(loongarch_call_tls_get_addr): Modify the called function name.
(loongarch_legitimize_tls_address): Likewise.
* config/loongarch/loongarch.md (@got_load_tls_gd): Delete.
(@load_tls): New template.
(@got_load_tls_ld): Delete.
(@got_load_tls_le): Delete.
(@got_load_tls_ie): Delete.
---
 gcc/config/loongarch/loongarch.cc | 47 +---
 gcc/config/loongarch/loongarch.md | 59 ---
 2 files changed, 30 insertions(+), 76 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index dba1252c8f7..2f7de6f94d3 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2736,36 +2736,12 @@ loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT 
offset)
 /* The __tls_get_attr symbol.  */
 static GTY (()) rtx loongarch_tls_symbol;
 
-/* Load an entry from the GOT for a TLS GD access.  */
+/* Load an entry for a TLS access.  */
 
 static rtx
-loongarch_got_load_tls_gd (rtx dest, rtx sym)
+loongarch_load_tls (rtx dest, rtx sym)
 {
-  return gen_got_load_tls_gd (Pmode, dest, sym);
-}
-
-/* Load an entry from the GOT for a TLS LD access.  */
-
-static rtx
-loongarch_got_load_tls_ld (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_ld (Pmode, dest, sym);
-}
-
-/* Load an entry from the GOT for a TLS IE access.  */
-
-static rtx
-loongarch_got_load_tls_ie (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_ie (Pmode, dest, sym);
-}
-
-/* Add in the thread pointer for a TLS LE access.  */
-
-static rtx
-loongarch_got_load_tls_le (rtx dest, rtx sym)
-{
-  return gen_got_load_tls_le (Pmode, dest, sym);
+  return gen_load_tls (Pmode, dest, sym);
 }
 
 /* Return an instruction sequence that calls __tls_get_addr.  SYM is
@@ -2809,14 +2785,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
emit_insn (gen_tls_low (Pmode, a0, high, loc));
 }
   else
-{
-  if (type == SYMBOL_TLSLDM)
-   emit_insn (loongarch_got_load_tls_ld (a0, loc));
-  else if (type == SYMBOL_TLSGD)
-   emit_insn (loongarch_got_load_tls_gd (a0, loc));
-  else
-   gcc_unreachable ();
-}
+emit_insn (loongarch_load_tls (a0, loc));
 
   if (flag_plt)
 {
@@ -2953,10 +2922,10 @@ loongarch_legitimize_tls_address (rtx loc)
  /* la.tls.ie; tp-relative add.  */
  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
  tmp1 = gen_reg_rtx (Pmode);
+ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  dest = gen_reg_rtx (Pmode);
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
  high = loongarch_force_temporary (tmp3, high);
@@ -2979,7 +2948,7 @@ loongarch_legitimize_tls_address (rtx loc)
emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
}
  else
-   emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
+   emit_insn (loongarch_load_tls (tmp1, tmp2));
  emit_insn (gen_add3_insn (dest, tmp1, tp));
}
   break;
@@ -3011,11 +2980,11 @@ loongarch_legitimize_tls_address (rtx loc)
 
  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
  tmp1 = gen_reg_rtx (Pmode);
+ tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  dest = gen_reg_rtx (Pmode);
 
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
  high = loongarch_force_temporary (tmp3, high);
@@ -3043,7 +3012,7 @@ loongarch_legitimize_tls_address (rtx loc)
}
}
  else
-   emit_insn (loongarch_got_load_tls_le (tmp1, loc));
+   emit_insn (loongarch_load_tls (tmp1, tmp2));
  emit_insn (gen_add3_insn (dest, tmp1, tp));
}
   break;
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index dda3cdf8be5..0b61b013798 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -51,10 +51,7 @@ (define_c_enum "unspec" [
   UNSPEC_BITREV_8B
 
   ;; TLS
-  UNSPEC_TLS_GD
-  UNSPEC_TLS_LD
-  UNSPEC_TLS_LE
-  UNSPEC_TLS_IE
+  UNSPEC_TLS
 
   ;; Stack tie
   UNSPEC_TIE
@@ -2701,45 +2698,33 @@ (define_insn "store_word"
 
 ;; Thread-Local Storage
 
-(define_insn "@got_load_tls_gd"
+(define_insn 

[PATCH v4 2/4] LoongArch: Add the macro implementation of mcmodel=extreme.

2024-01-25 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch-protos.h (loongarch_symbol_extreme_p):
Add function declaration.
* config/loongarch/loongarch.cc (loongarch_symbolic_constant_p):
For SYMBOL_PCREL64, non-zero addend of "la.local $rd,$rt,sym+addend"
is not allowed
(loongarch_load_tls): Added macro support in extreme mode.
(loongarch_call_tls_get_addr): Likewise.
(loongarch_legitimize_tls_address): Likewise.
(loongarch_force_address): Likewise.
(loongarch_legitimize_move): Likewise.
(loongarch_output_mi_thunk): Likewise.
(loongarch_option_override_internal): Remove the code that detects
explicit relocs status.
(loongarch_handle_model_attribute): Likewise.
* config/loongarch/loongarch.md (movdi_symbolic_off64): New template.
* config/loongarch/predicates.md (symbolic_off64_operand): New 
predicate.
(symbolic_off64_or_reg_operand): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/attr-model-5.c: New test.
* gcc.target/loongarch/func-call-extreme-5.c: New test.
* gcc.target/loongarch/func-call-extreme-6.c: New test.
* gcc.target/loongarch/tls-extreme-macro.c: New test.
---
 gcc/config/loongarch/loongarch-protos.h   |   1 +
 gcc/config/loongarch/loongarch.cc | 108 +++---
 gcc/config/loongarch/loongarch.md |  42 +++
 gcc/config/loongarch/predicates.md|  12 ++
 .../gcc.target/loongarch/attr-model-5.c   |   8 ++
 .../loongarch/func-call-extreme-5.c   |   7 ++
 .../loongarch/func-call-extreme-6.c   |   7 ++
 .../gcc.target/loongarch/tls-extreme-macro.c  |  35 ++
 8 files changed, 177 insertions(+), 43 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c

diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 9ffc92afead..1fdfda9af01 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -222,4 +222,5 @@ extern rtx loongarch_build_signbit_mask (machine_mode, 
bool, bool);
 extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool);
 extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode);
 extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
+extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type);
 #endif /* ! GCC_LOONGARCH_PROTOS_H */
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 2f7de6f94d3..4c64742f78b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1935,8 +1935,13 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
  relocations.  */
   switch (*symbol_type)
 {
-case SYMBOL_PCREL:
 case SYMBOL_PCREL64:
+  /* When the code model is extreme, the non-zero offset situation
+has not been handled well, so it is disabled here now.  */
+  if (!loongarch_explicit_relocs_p (SYMBOL_PCREL64))
+   return false;
+/* fall through */
+case SYMBOL_PCREL:
   /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
   return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
 
@@ -2739,9 +2744,15 @@ static GTY (()) rtx loongarch_tls_symbol;
 /* Load an entry for a TLS access.  */
 
 static rtx
-loongarch_load_tls (rtx dest, rtx sym)
+loongarch_load_tls (rtx dest, rtx sym, enum loongarch_symbol_type type)
 {
-  return gen_load_tls (Pmode, dest, sym);
+  /* TLS LE gets a 32 or 64 bit offset here, so one register can do it.  */
+  if (type == SYMBOL_TLS_LE)
+return gen_load_tls (Pmode, dest, sym);
+
+  return loongarch_symbol_extreme_p (type) ?
+gen_movdi_symbolic_off64 (dest, sym, gen_reg_rtx (DImode))
+: gen_load_tls (Pmode, dest, sym);
 }
 
 /* Return an instruction sequence that calls __tls_get_addr.  SYM is
@@ -2773,8 +2784,6 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
   if (TARGET_CMODEL_EXTREME)
{
- gcc_assert (TARGET_EXPLICIT_RELOCS);
-
  rtx tmp1 = gen_reg_rtx (Pmode);
  emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
@@ -2785,7 +2794,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
emit_insn (gen_tls_low (Pmode, a0, high, loc));
 }
   else
-emit_insn (loongarch_load_tls (a0, loc));
+emit_insn (loongarch_load_tls (a0, loc, type));
 
   if (flag_plt)
 {
@@ -2852,22 +2861,26 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
case CMODEL_EXTREME:
 

[PATCH v4 4/4] LoongArch: Added support for loading __get_tls_addr symbol address using call36.

2024-01-25 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
Add support for call36.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c: 
New test.
---
 gcc/config/loongarch/loongarch.cc | 20 +--
 ...icit-relocs-medium-call36-auto-tls-ld-gd.c |  5 +
 2 files changed, 19 insertions(+), 6 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index b76e201c0ef..19bb37b0c04 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2807,17 +2807,25 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
case CMODEL_MEDIUM:
{
- rtx reg = gen_reg_rtx (Pmode);
  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
- emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
- rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
-   loongarch_tls_symbol,
-   const0_rtx);
- insn = emit_call_insn (call);
+ rtx call;
+
+if (HAVE_AS_SUPPORT_CALL36)
+  call = gen_call_value_internal (v0, loongarch_tls_symbol, 
const0_rtx);
+else
+  {
+rtx reg = gen_reg_rtx (Pmode);
+emit_insn (gen_pcalau12i (Pmode, reg, 
loongarch_tls_symbol));
+call = gen_call_value_internal_1 (Pmode, v0, reg,
+  loongarch_tls_symbol,
+  const0_rtx);
+  }
+insn = emit_call_insn (call);
}
  else
{
+ rtx reg = gen_reg_rtx (Pmode);
  emit_move_insn (reg, loongarch_tls_symbol);
  insn = emit_call_insn (gen_call_value_internal (v0,
  reg,
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
new file mode 100644
index 000..d1a4820834c
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=medium -fplt" } */
+/* { dg-final { scan-assembler 
"pcaddu18i\t\\\$r1,%call36\\\(__tls_get_addr\\\)" { target { tls_native && 
loongarch_call36_support } } } } */
+
+#include "./explicit-relocs-auto-tls-ld-gd.c"
-- 
2.39.3



[PATCH v4 3/4] LoongArch: Enable explicit reloc for extreme TLS GD/LD with -mexplicit-relocs=auto.

2024-01-25 Thread Lulu Cheng
Binutils does not support relaxation using four instructions to obtain
symbol addresses

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
When the code model of the symbol is extreme and -mexplicit-relocs=auto,
the macro instruction loading symbol address is not applicable.
(loongarch_call_tls_get_addr): Adjust code.
(loongarch_legitimize_tls_address): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c: New 
test.
* gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c: New 
test.
---
 gcc/config/loongarch/loongarch.cc | 19 +--
 .../explicit-relocs-extreme-auto-tls-ld-gd.c  |  5 +
 .../explicit-relocs-medium-auto-tls-ld-gd.c   |  5 +
 3 files changed, 19 insertions(+), 10 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 4c64742f78b..b76e201c0ef 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1971,6 +1971,10 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type 
type)
   if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
 return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
 
+  /* The linker don't know how to relax accesses in extreme code model.  */
+  if (loongarch_symbol_extreme_p (type))
+return true;
+
   switch (type)
 {
   case SYMBOL_TLS_IE:
@@ -1982,11 +1986,6 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type 
type)
   does not relax 64-bit pc-relative accesses as at now.  */
return true;
   case SYMBOL_GOT_DISP:
-   /* The linker don't know how to relax GOT accesses in extreme
-  code model.  */
-   if (TARGET_CMODEL_EXTREME)
- return true;
-
/* If we are performing LTO for a final link, and we have the
   linker plugin so we know the resolution of the symbols, then
   all GOT references are binding to external symbols or
@@ -2776,7 +2775,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
 
   start_sequence ();
 
-  if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
+  if (loongarch_explicit_relocs_p (type))
 {
   /* Split tls symbol to high and low.  */
   rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
@@ -2809,7 +2808,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
case CMODEL_MEDIUM:
{
  rtx reg = gen_reg_rtx (Pmode);
- if (TARGET_EXPLICIT_RELOCS)
+ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
{
  emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
  rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
@@ -2845,7 +2844,7 @@ loongarch_call_tls_get_addr (rtx sym, enum 
loongarch_symbol_type type, rtx v0)
case CMODEL_NORMAL:
case CMODEL_MEDIUM:
{
- if (TARGET_EXPLICIT_RELOCS)
+ if (loongarch_explicit_relocs_p (SYMBOL_GOT_DISP))
{
  rtx high = gen_reg_rtx (Pmode);
  loongarch_emit_move (high,
@@ -2937,7 +2936,7 @@ loongarch_legitimize_tls_address (rtx loc)
  tmp1 = gen_reg_rtx (Pmode);
  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
  dest = gen_reg_rtx (Pmode);
- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ if (loongarch_explicit_relocs_p (SYMBOL_TLS_IE))
{
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
@@ -2994,7 +2993,7 @@ loongarch_legitimize_tls_address (rtx loc)
  tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
  dest = gen_reg_rtx (Pmode);
 
- if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
+ if (loongarch_explicit_relocs_p (SYMBOL_TLS_LE))
{
  tmp3 = gen_reg_rtx (Pmode);
  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
new file mode 100644
index 000..27baf4886d6
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto -mcmodel=extreme -fno-plt" } 
*/
+/* { dg-final { scan-assembler-not "la.tls.[lg]d" { target tls_native } } } */
+
+#include "./explicit-relocs-auto-tls-ld-gd.c"
diff --git 
a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c 

[PATCH v4 0/4] When cmodel=extreme, add macro support and only support macros.

2024-01-25 Thread Lulu Cheng
v3 -> v4:
  1. Add macro support for TLS symbols
  2. Added support for loading __get_tls_addr symbol address using call36.
  3. Merge template got_load_tls_{ld/gd/le/ie}.
  4. Enable explicit reloc for extreme TLS GD/LD with -mexplicit-relocs=auto.


v2 -> v3:
  1. Modify the detection rules of a test case.

v1 -> v2:
  1. Use the temporarily allocated registers as intermediate registers to 
implement the extreme macro.
  2. Fixed bugs in v1 test cases.


Lulu Cheng (4):
  LoongArch: Merge template got_load_tls_{ld/gd/le/ie}.
  LoongArch: Add the macro implementation of mcmodel=extreme.
  LoongArch: Enable explicit reloc for extreme TLS GD/LD with
-mexplicit-relocs=auto.
  LoongArch: Added support for loading __get_tls_addr symbol address
using call36.

 gcc/config/loongarch/loongarch-protos.h   |   1 +
 gcc/config/loongarch/loongarch.cc | 182 +-
 gcc/config/loongarch/loongarch.md | 101 ++
 gcc/config/loongarch/predicates.md|  12 ++
 .../gcc.target/loongarch/attr-model-5.c   |   8 +
 .../explicit-relocs-extreme-auto-tls-ld-gd.c  |   5 +
 .../explicit-relocs-medium-auto-tls-ld-gd.c   |   5 +
 ...icit-relocs-medium-call36-auto-tls-ld-gd.c |   5 +
 .../loongarch/func-call-extreme-5.c   |   7 +
 .../loongarch/func-call-extreme-6.c   |   7 +
 .../gcc.target/loongarch/tls-extreme-macro.c  |  35 
 11 files changed, 239 insertions(+), 129 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-5.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-extreme-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-auto-tls-ld-gd.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/explicit-relocs-medium-call36-auto-tls-ld-gd.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-extreme-macro.c

-- 
2.39.3



[PATCH] LoongArch: Disable TLS type symbols from generating non-zero offsets.

2024-01-22 Thread Lulu Cheng
TLS gd ld and ie type symbols will generate corresponding GOT entries,
so non-zero offsets cannot be generated.
The address of TLS le type symbol+addend is not implemented in binutils,
so non-zero offset is not generated here for the time being.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_symbolic_constant_p):
For symbols of type tls, non-zero Offset is not generated.
---
 gcc/config/loongarch/loongarch.cc | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 82467474288..f2ce1f6906d 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1924,11 +1924,7 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
   x = UNSPEC_ADDRESS (x);
 }
   else if (SYMBOL_REF_P (x) || LABEL_REF_P (x))
-{
-  *symbol_type = loongarch_classify_symbol (x);
-  if (*symbol_type == SYMBOL_TLS)
-   return true;
-}
+*symbol_type = loongarch_classify_symbol (x);
   else
 return false;
 
@@ -1939,17 +1935,21 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
  relocations.  */
   switch (*symbol_type)
 {
-case SYMBOL_TLS_IE:
-case SYMBOL_TLS_LE:
-case SYMBOL_TLSGD:
-case SYMBOL_TLSLDM:
 case SYMBOL_PCREL:
 case SYMBOL_PCREL64:
   /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
   return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
 
+/* The following symbol types do not allow non-zero offsets.  */
 case SYMBOL_GOT_DISP:
+case SYMBOL_TLS_IE:
+case SYMBOL_TLSGD:
+case SYMBOL_TLSLDM:
 case SYMBOL_TLS:
+/* From an implementation perspective, tls_le symbols are allowed to
+   have non-zero offsets, but currently binutils has not added support,
+   so the generation of non-zero offsets is prohibited here.  */
+case SYMBOL_TLS_LE:
   return false;
 }
   gcc_unreachable ();
-- 
2.39.3



[PATCH] LoongArch: Assign the '/u' attribute to the mem to which the global offset table belongs.

2024-01-12 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_split_symbol):
Assign the '/u' attribute to the mem.

gcc/testsuite/ChangeLog:

* g++.target/loongarch/got-load.C: New test.
---
 gcc/config/loongarch/loongarch.cc |  5 +
 gcc/testsuite/g++.target/loongarch/got-load.C | 19 +++
 2 files changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/loongarch/got-load.C

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 3b8559bfdc8..82467474288 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3202,6 +3202,11 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode 
mode, rtx *low_out)
  rtx mem = gen_rtx_MEM (Pmode, low);
  *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
 UNSPEC_LOAD_FROM_GOT);
+
+ /* Nonzero in a mem, if the memory is statically allocated and
+read-only.  A common example of the later is a shared library’s
+global offset table.  */
+ MEM_READONLY_P (mem) = 1;
}
 
  break;
diff --git a/gcc/testsuite/g++.target/loongarch/got-load.C 
b/gcc/testsuite/g++.target/loongarch/got-load.C
new file mode 100644
index 000..20924c73942
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/got-load.C
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2 -mexplicit-relocs -mcmodel=normal 
-fdump-rtl-expand" } */
+/* { dg-final { scan-rtl-dump-times "mem/u" 2 "expand" } } */
+
+#include 
+
+using namespace std;
+
+int lr[15][2];
+
+void
+test(void)
+{
+  int n;
+
+  cin >> n;
+  for (int i = 0; i < n; ++i)
+cin >> lr[i][0] >> lr[i][1];
+}
-- 
2.39.3



[PATCH 3/3] LoongArch: Redundant sign extension elimination optimization 2.

2024-01-06 Thread Lulu Cheng
From: liwei 

Eliminate the redundant sign extension that exists after the conditional
move when the target register is SImode.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
Adjust.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/sign-extend-2.c: Adjust.
---
 gcc/config/loongarch/loongarch.cc  | 2 ++
 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index ec376a7228a..4b757b30b64 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5371,6 +5371,8 @@ loongarch_expand_conditional_move (rtx *operands)
  rtx temp3 = gen_reg_rtx (mode);
  emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
  temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+ SUBREG_PROMOTED_VAR_P (temp3) = 1;
+ SUBREG_PROMOTED_SET (temp3, SRP_SIGNED);
  loongarch_emit_move (operands[0], temp3);
}
   else
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c 
b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
index a45dde4f73f..428535cb8e3 100644
--- a/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/sign-extend-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mabi=lp64d -O2" } */
-/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 1 
} } */
+/* { dg-final { scan-assembler-times "slli.w\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,0" 0 
} } */
 
 #include 
 #define my_min(x, y) ((x) < (y) ? (x) : (y))
-- 
2.39.3



[PATCH 1/3] LoongArch: Optimized some of the symbolic expansion instructions generated during bitwise operations.

2024-01-06 Thread Lulu Cheng
There are two mode iterators defined in the loongarch.md:
(define_mode_iterator GPR [SI (DI "TARGET_64BIT")])
  and
(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
Replace the mode in the bit arithmetic from GPR to X.

Since the bitwise operation instruction does not distinguish between 64-bit,
32-bit, etc., it is necessary to perform symbolic expansion if the bitwise
operation is less than 64 bits.
The original definition would have generated a lot of redundant symbolic
extension instructions. This problem is optimized with reference to the
implementation of RISCV.

Add this patch spec2017 500.perlbench performance improvement by 1.8%

gcc/ChangeLog:

* config/loongarch/loongarch.md (one_cmpl2): Replace GPR with X.
(*nor3): Likewise.
(nor3): Likewise.
(*negsi2_extended): New template.
(*si3_internal): Likewise.
(*one_cmplsi2_internal): Likewise.
(*norsi3_internal): Likewise.
(*nsi_internal): Likewise.
(bytepick_w__extend): Modify this template according to 
the
modified bit operation to make the optimization work.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/sign-extend-bitwise.c: New test.
---
 gcc/config/loongarch/loongarch.md | 93 ++-
 .../loongarch/sign-extend-bitwise.c   | 21 +
 2 files changed, 90 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-bitwise.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index d1f5b94f5d6..436b9a93235 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -736,7 +736,7 @@ (define_insn "sub3"
 
 (define_insn "sub3"
   [(set (match_operand:GPR 0 "register_operand" "=r")
-   (minus:GPR (match_operand:GPR 1 "register_operand" "rJ")
+   (minus:GPR (match_operand:GPR 1 "register_operand" "r")
   (match_operand:GPR 2 "register_operand" "r")))]
   ""
   "sub.\t%0,%z1,%2"
@@ -1412,13 +1412,13 @@ (define_insn "neg2"
   [(set_attr "alu_type""sub")
(set_attr "mode" "")])
 
-(define_insn "one_cmpl2"
-  [(set (match_operand:GPR 0 "register_operand" "=r")
-   (not:GPR (match_operand:GPR 1 "register_operand" "r")))]
-  ""
-  "nor\t%0,%.,%1"
-  [(set_attr "alu_type" "not")
-   (set_attr "mode" "")])
+(define_insn "*negsi2_extended"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (sign_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"]
+  "TARGET_64BIT"
+  "sub.w\t%0,%.,%1"
+  [(set_attr "alu_type" "sub")
+   (set_attr "mode" "SI")])
 
 (define_insn "neg2"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -1438,14 +1438,39 @@ (define_insn "neg2"
 ;;
 
 (define_insn "3"
-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
-   (any_bitwise:GPR (match_operand:GPR 1 "register_operand" "%r,r")
-(match_operand:GPR 2 "uns_arith_operand" "r,K")))]
+  [(set (match_operand:X 0 "register_operand" "=r,r")
+   (any_bitwise:X (match_operand:X 1 "register_operand" "%r,r")
+  (match_operand:X 2 "uns_arith_operand" "r,K")))]
   ""
   "%i2\t%0,%1,%2"
   [(set_attr "type" "logical")
(set_attr "mode" "")])
 
+(define_insn "*si3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+   (any_bitwise:SI (match_operand:SI 1 "register_operand" "%r,r")
+   (match_operand:SI 2 "uns_arith_operand"" r,K")))]
+  "TARGET_64BIT"
+  "%i2\t%0,%1,%2"
+  [(set_attr "type" "logical")
+   (set_attr "mode" "SI")])
+
+(define_insn "one_cmpl2"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (not:X (match_operand:X 1 "register_operand" "r")))]
+  ""
+  "nor\t%0,%.,%1"
+  [(set_attr "alu_type" "not")
+   (set_attr "mode" "")])
+
+(define_insn "*one_cmplsi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (not:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_64BIT"
+  "nor\t%0,%.,%1"
+  [(set_attr "type" "logical")
+   (set_attr "mode" "SI")])
+
 (define_insn "and3_extended"
   [(set (match_operand:GPR 0 "register_operand" "=r")
(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r")
@@ -1561,25 +1586,43 @@ (define_insn "*iorhi3"
   [(set_attr "type" "logical")
(set_attr "mode" "HI")])
 
-(define_insn "*nor3"
-  [(set (match_operand:GPR 0 "register_operand" "=r")
-   (and:GPR (not:GPR (match_operand:GPR 1 "register_operand" "%r"))
-(not:GPR (match_operand:GPR 2 "register_operand" "r"]
+(define_insn "nor3"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (and:X (not:X (match_operand:X 1 "register_operand" "%r"))
+(not:X (match_operand:X 2 "register_operand" "r"]
   ""
   "nor\t%0,%1,%2"
   [(set_attr "type" "logical")
(set_attr "mode" "")])
 
+(define_insn "*norsi3_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+   (and:SI 

[PATCH 2/3] LoongArch: Redundant sign extension elimination optimization.

2024-01-06 Thread Lulu Cheng
From: liwei 

We found that the current combine optimization pass in gcc cannot handle
the following redundant sign extension situations:

(insn 77 76 78 5 (set (reg:SI 143)
(plus:SI (subreg/s/u:SI (reg/v:DI 104 [ len ]) 0)
(const_int 1 [0x1]))) {addsi3}
(expr_list:REG_DEAD (reg/v:DI 104 [ len ])
(nil)))
(insn 78 77 82 5 (set (reg/v:DI 104 [ len ])
(sign_extend:DI (reg:SI 143))) {extendsidi2}
(nil))

Because reg:SI 143 is not died or set in insn 78, no replacement merge will
be performed for the insn sequence. We adjusted the add template to eliminate
redundant sign extensions during the expand pass.

gcc/ChangeLog:

* config/loongarch/loongarch.md (add3): Removed.
(*addsi3): New.
(addsi3): New.
(adddi3): New.
(*addsi3_extended): Removed.
(addsi3_extended): New.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/sign-extend.c: Moved to...
* gcc.target/loongarch/sign-extend-1.c: ...here.
* gcc.target/loongarch/sign-extend-2.c: New test.
---
 gcc/config/loongarch/loongarch.md | 93 ---
 .../{sign-extend.c => sign-extend-1.c}|  0
 .../gcc.target/loongarch/sign-extend-2.c  | 59 
 3 files changed, 137 insertions(+), 15 deletions(-)
 rename gcc/testsuite/gcc.target/loongarch/{sign-extend.c => sign-extend-1.c} 
(100%)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend-2.c

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 436b9a93235..17ec401f535 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -657,15 +657,15 @@ (define_insn "add3"
   [(set_attr "type" "fadd")
(set_attr "mode" "")])
 
-(define_insn_and_split "add3"
-  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
-   (plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
- (match_operand:GPR 2 "plus__operand"
+(define_insn_and_split "*addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r")
+   (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r,r,r")
+ (match_operand:SI 2 "plus_si_operand"
   "r,I,La,Lb,Lc,Ld,Le")))]
   ""
   "@
-   add.\t%0,%1,%2
-   addi.\t%0,%1,%2
+   add.w\t%0,%1,%2
+   addi.w\t%0,%1,%2
#
* operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
  return \"addu16i.d\t%0,%1,%2\";
@@ -674,25 +674,88 @@ (define_insn_and_split "add3"
#"
   "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
&& !ADDU16I_OPERAND (INTVAL (operands[2]))"
-  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
   {
-loongarch_split_plus_constant ([2], mode);
+loongarch_split_plus_constant ([2], SImode);
   }
   [(set_attr "alu_type" "add")
-   (set_attr "mode" "")
+   (set_attr "mode" "SI")
(set_attr "insn_count" "1,1,2,1,2,2,2")
(set (attr "enabled")
   (cond
-   [(match_test "mode != DImode && which_alternative == 4")
+   [(match_test "which_alternative == 4")
 (const_string "no")
-(match_test "mode != DImode && which_alternative == 5")
-(const_string "no")
-(match_test "mode != SImode && which_alternative == 6")
+(match_test "which_alternative == 5")
+(const_string "no")]
+   (const_string "yes")))])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r")
+   (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r,r")
+(match_operand:SI 2 "plus_si_operand"  "r,I,La,Le,Lb")))]
+  ""
+{
+  if (TARGET_64BIT)
+{
+  if (CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])
+ && ADDU16I_OPERAND (INTVAL (operands[2])))
+   {
+ rtx t1 = gen_reg_rtx (DImode);
+ rtx t2 = gen_reg_rtx (DImode);
+ rtx t3 = gen_reg_rtx (DImode);
+ emit_insn (gen_extend_insn (t1, operands[1], DImode, SImode, 0));
+ t2 = operands[2];
+ emit_insn (gen_adddi3 (t3, t1, t2));
+ t3 = gen_lowpart (SImode, t3);
+ emit_move_insn (operands[0], t3);
+ DONE;
+   }
+  else
+   {
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
+ t = gen_lowpart (SImode, t);
+ SUBREG_PROMOTED_VAR_P (t) = 1;
+ SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+ emit_move_insn (operands[0], t);
+ DONE;
+   }
+}
+})
+
+(define_insn_and_split "adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r,r,r")
+   (plus:DI (match_operand:DI 1 "register_operand" "r,r,r,r,r,r,r")
+ (match_operand:DI 2 "plus_di_operand"
+  

[PATCH v3 1/2] LoongArch: Add the macro implementation of mcmodel=extreme.

2024-01-04 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_symbolic_constant_p):
Remove the sym+addend form from the SYMBOL_PCREL64 type symbol.
(loongarch_output_mi_thunk): Add code model extreme support.
(loongarch_option_override_internal): Supports option combinations
of -cmodel=extreme and -mexplicit-relocs=none.
(loongarch_handle_model_attribute): Remove detection code.
* config/loongarch/loongarch.md (movdi_pcrel64): New templated.
(movdi_got_disp): Likewise.
* config/loongarch/predicates.md (symbolic_got_operand): Determine
whether the symbol type is SYMBOL_GOT_DISP.
(symbolic_pcrel64_operand): Determine whether the symbol type is
SYMBOL_PCREL64.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/func-call-extreme-5.c: New test.
* gcc.target/loongarch/func-call-extreme-6.c: New test.
---
 gcc/config/loongarch/loongarch.cc | 28 +--
 gcc/config/loongarch/loongarch.md | 50 +++
 gcc/config/loongarch/predicates.md| 14 ++
 .../loongarch/func-call-extreme-5.c   |  7 +++
 .../loongarch/func-call-extreme-6.c   |  7 +++
 5 files changed, 91 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 28d64135c54..6a3321327ea 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1944,10 +1944,10 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
 case SYMBOL_TLSGD:
 case SYMBOL_TLSLDM:
 case SYMBOL_PCREL:
-case SYMBOL_PCREL64:
   /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
   return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
 
+case SYMBOL_PCREL64:
 case SYMBOL_GOT_DISP:
 case SYMBOL_TLS:
   return false;
@@ -7450,12 +7450,22 @@ loongarch_output_mi_thunk (FILE *file, tree 
thunk_fndecl ATTRIBUTE_UNUSED,
  allowed, otherwise load the address into a register first.  */
   if (use_sibcall_p)
 {
-  insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
+  if (TARGET_CMODEL_EXTREME)
+   {
+ emit_insn (gen_movdi_pcrel64 (temp1, fnaddr, temp2));
+ insn = emit_call_insn (gen_sibcall_internal (temp1, const0_rtx));
+   }
+  else
+   insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
   SIBLING_CALL_P (insn) = 1;
 }
   else
 {
-  loongarch_emit_move (temp1, fnaddr);
+  if (TARGET_CMODEL_EXTREME)
+   emit_insn (gen_movdi_pcrel64 (temp1, fnaddr, temp2));
+  else
+   loongarch_emit_move (temp1, fnaddr);
+
   emit_jump_insn (gen_indirect_jump (temp1));
 }
 
@@ -7583,10 +7593,6 @@ loongarch_option_override_internal (struct gcc_options 
*opts,
   switch (la_target.cmodel)
 {
   case CMODEL_EXTREME:
-   if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
- error ("code model %qs is not compatible with %s",
-"extreme", "-mexplicit-relocs=none");
-
if (opts->x_flag_plt)
  {
if (global_options_set.x_flag_plt)
@@ -7951,14 +7957,6 @@ loongarch_handle_model_attribute (tree *node, tree name, 
tree arg, int,
  *no_add_attrs = true;
  return NULL_TREE;
}
-  if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
-   {
- error_at (DECL_SOURCE_LOCATION (decl),
-   "%qE attribute is not compatible with %s", name,
-   "-mexplicit-relocs=none");
- *no_add_attrs = true;
- return NULL_TREE;
-   }
 
   arg = TREE_VALUE (arg);
   if (TREE_CODE (arg) != STRING_CST)
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index d1f5b94f5d6..c0365dc9e99 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -85,6 +85,9 @@ (define_c_enum "unspec" [
 
   UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1
   UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1
+
+  UNSPEC_MOV_PCREL64
+  UNSPEC_MOV_GOT_DISP
 ])
 
 (define_c_enum "unspecv" [
@@ -2057,6 +2060,25 @@ (define_expand "movdi"
 {
   if (loongarch_legitimize_move (DImode, operands[0], operands[1]))
 DONE;
+
+  enum loongarch_symbol_type symbol_type;
+  if (loongarch_symbolic_constant_p (operands[1], _type))
+{
+  if (symbol_type == SYMBOL_PCREL64)
+   {
+ gcc_assert (can_create_pseudo_p ());
+ emit_insn (gen_movdi_pcrel64 (operands[0], operands[1],
+   gen_reg_rtx (DImode)));
+ DONE;
+   }
+  else if (TARGET_CMODEL_EXTREME && symbol_type == SYMBOL_GOT_DISP)
+   {
+ gcc_assert (can_create_pseudo_p ());
+ emit_insn (gen_movdi_got_disp (operands[0], 

[PATCH v3 0/2] When cmodel=extreme, add macro support and only support macros.

2024-01-04 Thread Lulu Cheng
When cmodel=extreme, since the symbol address is obtained through four 
instructions,
errors may occur in some cases during linking. Therefore, in order to ensure 
that
the instructions for obtaining the symbol address are together, macro 
instructions
are used to obtain the symbol address when cmodel=extreme.

https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc#extreme-code-model

v1 -> v2:
  1. Use the temporarily allocated registers as intermediate registers to 
implement the extreme macro.
  2. Fixed bugs in v1 test cases.

v2 -> v3:
  1. Modify the detection rules of a test case.

Lulu Cheng (2):
  LoongArch: Add the macro implementation of mcmodel=extreme.
  LoongArch: When the code model is extreme, the symbol address is
obtained through macro instructions regardless of the value of
-mexplicit-relocs.

 gcc/config/loongarch/loongarch.cc | 39 +--
 gcc/config/loongarch/loongarch.md | 50 +++
 gcc/config/loongarch/predicates.md| 14 ++
 .../gcc.target/loongarch/attr-model-1.c   |  2 +-
 .../gcc.target/loongarch/attr-model-2.c   |  2 +-
 .../gcc.target/loongarch/attr-model-3.c   |  2 +-
 .../gcc.target/loongarch/attr-model-4.c   |  2 +-
 .../loongarch/func-call-extreme-1.c   |  6 +--
 .../loongarch/func-call-extreme-2.c   |  6 +--
 .../loongarch/func-call-extreme-3.c   |  6 +--
 .../loongarch/func-call-extreme-4.c   |  6 +--
 .../loongarch/func-call-extreme-5.c   |  7 +++
 .../loongarch/func-call-extreme-6.c   |  7 +++
 13 files changed, 118 insertions(+), 31 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c

-- 
2.39.3



[PATCH v3 2/2] LoongArch: When the code model is extreme, the symbol address is obtained through macro instructions regardless of the value of -mexplicit-relocs.

2024-01-04 Thread Lulu Cheng
Instructions pcalau12i, addi.d, lu32i.d and lu52i.d must be adjancent so that 
the
linker can infer the PC of pcalau12i to apply relocations to lu32i.d and 
lu52i.d.
Otherwise, the results would be incorrect if these four instructions are not in
the same 4KiB page.

See the link for details:
https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc#extreme-code-model.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_symbol_extreme_p): Add
function declaration.
(loongarch_explicit_relocs_p): Use the macro instruction to get
the symbol address when loongarch_symbol_extreme_p returns true.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/attr-model-1.c: Modify the content of the search
string in the test case.
* gcc.target/loongarch/attr-model-2.c: Likewise.
* gcc.target/loongarch/attr-model-3.c: Likewise.
* gcc.target/loongarch/attr-model-4.c: Likewise.
* gcc.target/loongarch/func-call-extreme-1.c: Likewise.
* gcc.target/loongarch/func-call-extreme-2.c: Likewise.
* gcc.target/loongarch/func-call-extreme-3.c: Likewise.
* gcc.target/loongarch/func-call-extreme-4.c: Likewise.
---
 gcc/config/loongarch/loongarch.cc | 11 +++
 gcc/testsuite/gcc.target/loongarch/attr-model-1.c |  2 +-
 gcc/testsuite/gcc.target/loongarch/attr-model-2.c |  2 +-
 gcc/testsuite/gcc.target/loongarch/attr-model-3.c |  2 +-
 gcc/testsuite/gcc.target/loongarch/attr-model-4.c |  2 +-
 .../gcc.target/loongarch/func-call-extreme-1.c|  6 +++---
 .../gcc.target/loongarch/func-call-extreme-2.c|  6 +++---
 .../gcc.target/loongarch/func-call-extreme-3.c|  6 +++---
 .../gcc.target/loongarch/func-call-extreme-4.c|  6 +++---
 9 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 6a3321327ea..3b4b28f3bcc 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -264,6 +264,9 @@ const char *const
 loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)};
 #undef STRINGIFY
 
+static bool
+loongarch_symbol_extreme_p (enum loongarch_symbol_type type);
+
 /* Size of guard page.  */
 #define STACK_CLASH_PROTECTION_GUARD_SIZE \
   (1 << param_stack_clash_protection_guard_size)
@@ -1963,6 +1966,14 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
 bool
 loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
 {
+  /* Instructions pcalau12i, addi.d, lu32i.d and lu52i.d must be adjancent
+ so that the linker can infer the PC of pcalau12i to apply relocations
+ to lu32i.d and lu52i.d.  Otherwise, the results would be incorrect if
+ these four instructions are not in the same 4KiB page.
+ Therefore, macro instructions are used when cmodel=extreme.  */
+  if (loongarch_symbol_extreme_p (type))
+return false;
+
   if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
 return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
 
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-1.c 
b/gcc/testsuite/gcc.target/loongarch/attr-model-1.c
index 916d715b98b..199849147de 100644
--- a/gcc/testsuite/gcc.target/loongarch/attr-model-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/attr-model-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mexplicit-relocs -mcmodel=normal -O2" } */
-/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */
+/* { dg-final { scan-assembler-times "la\.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+," 
2 } } */
 
 #define ATTR_MODEL_TEST
 #include "attr-model-test.c"
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-2.c 
b/gcc/testsuite/gcc.target/loongarch/attr-model-2.c
index a74c795ac3e..72dcc89930f 100644
--- a/gcc/testsuite/gcc.target/loongarch/attr-model-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/attr-model-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mexplicit-relocs -mcmodel=extreme -O2" } */
-/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */
+/* { dg-final { scan-assembler-times "la\.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+," 
3 } } */
 
 #define ATTR_MODEL_TEST
 #include "attr-model-test.c"
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c 
b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
index 5622d508678..cfb299986cc 100644
--- a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
+++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */
-/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */
+/* { dg-final { scan-assembler-times "la\.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+," 
2 } } */
 
 #define ATTR_MODEL_TEST
 #include "attr-model-test.c"
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c 
b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c
index 482724bb974..a3ab22794d3 100644
--- 

[PATCH v2 1/2] LoongArch: Add the macro implementation of mcmodel=extreme.

2024-01-04 Thread Lulu Cheng
gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_symbolic_constant_p):
Remove the sym+addend form from the SYMBOL_PCREL64 type symbol.
(loongarch_output_mi_thunk): Add code model extreme support.
(loongarch_option_override_internal): Supports option combinations
of -cmodel=extreme and -mexplicit-relocs=none.
(loongarch_handle_model_attribute): Remove detection code.
* config/loongarch/loongarch.md (movdi_pcrel64): New templated.
(movdi_got_disp): Likewise.
* config/loongarch/predicates.md (symbolic_got_operand): Determine
whether the symbol type is SYMBOL_GOT_DISP.
(symbolic_pcrel64_operand): Determine whether the symbol type is
SYMBOL_PCREL64.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/func-call-extreme-5.c: New test.
* gcc.target/loongarch/func-call-extreme-6.c: New test.
---
 gcc/config/loongarch/loongarch.cc | 28 +--
 gcc/config/loongarch/loongarch.md | 50 +++
 gcc/config/loongarch/predicates.md| 14 ++
 .../loongarch/func-call-extreme-5.c   |  7 +++
 .../loongarch/func-call-extreme-6.c   |  7 +++
 5 files changed, 91 insertions(+), 15 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 28d64135c54..6a3321327ea 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1944,10 +1944,10 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
 case SYMBOL_TLSGD:
 case SYMBOL_TLSLDM:
 case SYMBOL_PCREL:
-case SYMBOL_PCREL64:
   /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
   return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
 
+case SYMBOL_PCREL64:
 case SYMBOL_GOT_DISP:
 case SYMBOL_TLS:
   return false;
@@ -7450,12 +7450,22 @@ loongarch_output_mi_thunk (FILE *file, tree 
thunk_fndecl ATTRIBUTE_UNUSED,
  allowed, otherwise load the address into a register first.  */
   if (use_sibcall_p)
 {
-  insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
+  if (TARGET_CMODEL_EXTREME)
+   {
+ emit_insn (gen_movdi_pcrel64 (temp1, fnaddr, temp2));
+ insn = emit_call_insn (gen_sibcall_internal (temp1, const0_rtx));
+   }
+  else
+   insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
   SIBLING_CALL_P (insn) = 1;
 }
   else
 {
-  loongarch_emit_move (temp1, fnaddr);
+  if (TARGET_CMODEL_EXTREME)
+   emit_insn (gen_movdi_pcrel64 (temp1, fnaddr, temp2));
+  else
+   loongarch_emit_move (temp1, fnaddr);
+
   emit_jump_insn (gen_indirect_jump (temp1));
 }
 
@@ -7583,10 +7593,6 @@ loongarch_option_override_internal (struct gcc_options 
*opts,
   switch (la_target.cmodel)
 {
   case CMODEL_EXTREME:
-   if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
- error ("code model %qs is not compatible with %s",
-"extreme", "-mexplicit-relocs=none");
-
if (opts->x_flag_plt)
  {
if (global_options_set.x_flag_plt)
@@ -7951,14 +7957,6 @@ loongarch_handle_model_attribute (tree *node, tree name, 
tree arg, int,
  *no_add_attrs = true;
  return NULL_TREE;
}
-  if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
-   {
- error_at (DECL_SOURCE_LOCATION (decl),
-   "%qE attribute is not compatible with %s", name,
-   "-mexplicit-relocs=none");
- *no_add_attrs = true;
- return NULL_TREE;
-   }
 
   arg = TREE_VALUE (arg);
   if (TREE_CODE (arg) != STRING_CST)
diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index d1f5b94f5d6..c0365dc9e99 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -85,6 +85,9 @@ (define_c_enum "unspec" [
 
   UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1
   UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1
+
+  UNSPEC_MOV_PCREL64
+  UNSPEC_MOV_GOT_DISP
 ])
 
 (define_c_enum "unspecv" [
@@ -2057,6 +2060,25 @@ (define_expand "movdi"
 {
   if (loongarch_legitimize_move (DImode, operands[0], operands[1]))
 DONE;
+
+  enum loongarch_symbol_type symbol_type;
+  if (loongarch_symbolic_constant_p (operands[1], _type))
+{
+  if (symbol_type == SYMBOL_PCREL64)
+   {
+ gcc_assert (can_create_pseudo_p ());
+ emit_insn (gen_movdi_pcrel64 (operands[0], operands[1],
+   gen_reg_rtx (DImode)));
+ DONE;
+   }
+  else if (TARGET_CMODEL_EXTREME && symbol_type == SYMBOL_GOT_DISP)
+   {
+ gcc_assert (can_create_pseudo_p ());
+ emit_insn (gen_movdi_got_disp (operands[0], 

[PATCH v2 2/2] LoongArch: When the code model is extreme, the symbol address is obtained through macro instructions regardless of the value of -mexplicit-relocs.

2024-01-04 Thread Lulu Cheng
Instructions pcalau12i, addi.d, lu32i.d and lu52i.d must be adjancent so that 
the
linker can infer the PC of pcalau12i to apply relocations to lu32i.d and 
lu52i.d.
Otherwise, the results would be incorrect if these four instructions are not in
the same 4KiB page.

See the link for details:
https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc#extreme-code-model.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_symbol_extreme_p): Add
function declaration.
(loongarch_explicit_relocs_p): Use the macro instruction to get
the symbol address when loongarch_symbol_extreme_p returns true.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/attr-model-1.c: Modify the content of the search
string in the test case.
* gcc.target/loongarch/attr-model-2.c: Likewise.
* gcc.target/loongarch/attr-model-3.c: Likewise.
* gcc.target/loongarch/attr-model-4.c: Likewise.
* gcc.target/loongarch/func-call-extreme-1.c: Likewise.
* gcc.target/loongarch/func-call-extreme-2.c: Likewise.
* gcc.target/loongarch/func-call-extreme-3.c: Likewise.
* gcc.target/loongarch/func-call-extreme-4.c: Likewise.
---
 gcc/config/loongarch/loongarch.cc | 11 +++
 gcc/testsuite/gcc.target/loongarch/attr-model-1.c |  2 +-
 gcc/testsuite/gcc.target/loongarch/attr-model-2.c |  2 +-
 gcc/testsuite/gcc.target/loongarch/attr-model-3.c |  2 +-
 gcc/testsuite/gcc.target/loongarch/attr-model-4.c |  2 +-
 .../gcc.target/loongarch/func-call-extreme-1.c|  6 +++---
 .../gcc.target/loongarch/func-call-extreme-2.c|  6 +++---
 .../gcc.target/loongarch/func-call-extreme-3.c|  6 +++---
 .../gcc.target/loongarch/func-call-extreme-4.c|  6 +++---
 9 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index 6a3321327ea..3b4b28f3bcc 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -264,6 +264,9 @@ const char *const
 loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)};
 #undef STRINGIFY
 
+static bool
+loongarch_symbol_extreme_p (enum loongarch_symbol_type type);
+
 /* Size of guard page.  */
 #define STACK_CLASH_PROTECTION_GUARD_SIZE \
   (1 << param_stack_clash_protection_guard_size)
@@ -1963,6 +1966,14 @@ loongarch_symbolic_constant_p (rtx x, enum 
loongarch_symbol_type *symbol_type)
 bool
 loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
 {
+  /* Instructions pcalau12i, addi.d, lu32i.d and lu52i.d must be adjancent
+ so that the linker can infer the PC of pcalau12i to apply relocations
+ to lu32i.d and lu52i.d.  Otherwise, the results would be incorrect if
+ these four instructions are not in the same 4KiB page.
+ Therefore, macro instructions are used when cmodel=extreme.  */
+  if (loongarch_symbol_extreme_p (type))
+return false;
+
   if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
 return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
 
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-1.c 
b/gcc/testsuite/gcc.target/loongarch/attr-model-1.c
index 916d715b98b..65acb29162c 100644
--- a/gcc/testsuite/gcc.target/loongarch/attr-model-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/attr-model-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mexplicit-relocs -mcmodel=normal -O2" } */
-/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */
+/* { dg-final { scan-assembler-times "la\.local\t\\\$r\[0-9\]+,\\\$r15," 2 } } 
*/
 
 #define ATTR_MODEL_TEST
 #include "attr-model-test.c"
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-2.c 
b/gcc/testsuite/gcc.target/loongarch/attr-model-2.c
index a74c795ac3e..cf0f079e39a 100644
--- a/gcc/testsuite/gcc.target/loongarch/attr-model-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/attr-model-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mexplicit-relocs -mcmodel=extreme -O2" } */
-/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */
+/* { dg-final { scan-assembler-times "la\.local\t\\\$r\[0-9\]+,\\\$r15," 3 } } 
*/
 
 #define ATTR_MODEL_TEST
 #include "attr-model-test.c"
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c 
b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
index 5622d508678..7c270d462f7 100644
--- a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
+++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */
-/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */
+/* { dg-final { scan-assembler-times "la\.local\t\\\$r\[0-9\]+,\\\$r15," 2 } } 
*/
 
 #define ATTR_MODEL_TEST
 #include "attr-model-test.c"
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c 
b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c
index 482724bb974..627d630c36d 100644
--- 

[PATCH v2 0/2] When cmodel=extreme, add macro support and only support macros.

2024-01-04 Thread Lulu Cheng
When cmodel=extreme, since the symbol address is obtained through four 
instructions,
errors may occur in some cases during linking. Therefore, in order to ensure 
that
the instructions for obtaining the symbol address are together, macro 
instructions
are used to obtain the symbol address when cmodel=extreme.

https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc#extreme-code-model

v1 -> v2:
  1. Use the temporarily allocated registers as intermediate registers to 
implement the extreme macro.
  2. Fixed bugs in v1 test cases.

Lulu Cheng (2):
  LoongArch: Add the macro implementation of mcmodel=extreme.
  LoongArch: When the code model is extreme, the symbol address is
obtained through macro instructions regardless of the value of
-mexplicit-relocs.

 gcc/config/loongarch/loongarch.cc | 39 +--
 gcc/config/loongarch/loongarch.md | 50 +++
 gcc/config/loongarch/predicates.md| 14 ++
 .../gcc.target/loongarch/attr-model-1.c   |  2 +-
 .../gcc.target/loongarch/attr-model-2.c   |  2 +-
 .../gcc.target/loongarch/attr-model-3.c   |  2 +-
 .../gcc.target/loongarch/attr-model-4.c   |  2 +-
 .../loongarch/func-call-extreme-1.c   |  6 +--
 .../loongarch/func-call-extreme-2.c   |  6 +--
 .../loongarch/func-call-extreme-3.c   |  6 +--
 .../loongarch/func-call-extreme-4.c   |  6 +--
 .../loongarch/func-call-extreme-5.c   |  7 +++
 .../loongarch/func-call-extreme-6.c   |  7 +++
 13 files changed, 118 insertions(+), 31 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-6.c

-- 
2.39.3



[PATCH] LoongArch: Fixed the problem of incorrect judgment of the immediate field of the [x]vld/[x]vst instruction.

2024-01-03 Thread Lulu Cheng
The [x]vld/[x]vst directive is defined as follows:
  [x]vld/[x]vst {x/v}d, rj, si12

When not modified, the immediate field of [x]vld/[x]vst is between 10 and
14 bits depending on the type. However, in loongarch_valid_offset_p, the
immediate field is restricted first, so there is no error. However, in
some cases redundant instructions will be generated, see test cases.
Now modify it according to the description in the instruction manual.

gcc/ChangeLog:

* config/loongarch/lasx.md (lasx_mxld_):
Modify the method of determining the memory offset of [x]vld/[x]vst.
(lasx_mxst_): Likewise.
* config/loongarch/loongarch.cc (loongarch_valid_offset_p): Delete.
(loongarch_address_insns): Likewise.
* config/loongarch/lsx.md (lsx_ld_): Likewise.
(lsx_st_): Likewise.
* config/loongarch/predicates.md (aq10b_operand): Likewise.
(aq10h_operand): Likewise.
(aq10w_operand): Likewise.
(aq10d_operand): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vect-ld-st-imm12.c: New test.
---
 gcc/config/loongarch/lasx.md  | 26 ---
 gcc/config/loongarch/loongarch.cc | 19 +++---
 gcc/config/loongarch/lsx.md   | 26 ---
 gcc/config/loongarch/predicates.md| 16 
 .../gcc.target/loongarch/vect-ld-st-imm12.c   | 15 +++
 5 files changed, 19 insertions(+), 83 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index dbbf5a136b7..95c6bae20ae 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -846,32 +846,6 @@ (define_split
   DONE;
 })
 
-;; Offset load
-(define_expand "lasx_mxld_"
-  [(match_operand:LASX 0 "register_operand")
-   (match_operand 1 "pmode_register_operand")
-   (match_operand 2 "aq10_operand")]
-  "ISA_HAS_LASX"
-{
-  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
- INTVAL (operands[2]));
-  loongarch_emit_move (operands[0], gen_rtx_MEM (mode, addr));
-  DONE;
-})
-
-;; Offset store
-(define_expand "lasx_mxst_"
-  [(match_operand:LASX 0 "register_operand")
-   (match_operand 1 "pmode_register_operand")
-   (match_operand 2 "aq10_operand")]
-  "ISA_HAS_LASX"
-{
-  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
-   INTVAL (operands[2]));
-  loongarch_emit_move (gen_rtx_MEM (mode, addr), operands[0]);
-  DONE;
-})
-
 ;; LASX
 (define_insn "add3"
   [(set (match_operand:ILASX 0 "register_operand" "=f,f,f")
diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index db83232884f..b82ef1a7c0a 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2126,21 +2126,11 @@ loongarch_valid_offset_p (rtx x, machine_mode mode)
 
   /* We may need to split multiword moves, so make sure that every word
  is accessible.  */
-  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
+  if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
+  && GET_MODE_SIZE (mode) > UNITS_PER_WORD
   && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
 return false;
 
-  /* LSX LD.* and ST.* supports 10-bit signed offsets.  */
-  if (LSX_SUPPORTED_MODE_P (mode)
-  && !loongarch_signed_immediate_p (INTVAL (x), 10,
-   loongarch_ldst_scaled_shift (mode)))
-return false;
-
-  /* LASX XVLD.B and XVST.B supports 10-bit signed offsets without shift.  */
-  if (LASX_SUPPORTED_MODE_P (mode)
-  && !loongarch_signed_immediate_p (INTVAL (x), 10, 0))
-return false;
-
   return true;
 }
 
@@ -2376,9 +2366,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool 
might_split_p)
   case ADDRESS_REG:
if (lsx_p)
  {
-   /* LSX LD.* and ST.* supports 10-bit signed offsets.  */
-   if (loongarch_signed_immediate_p (INTVAL (addr.offset), 10,
- loongarch_ldst_scaled_shift 
(mode)))
+   /* LSX LD.* and ST.* supports 12-bit signed offsets.  */
+   if (IMM12_OPERAND (INTVAL (addr.offset)))
  return 1;
else
  return 0;
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 3e3248ef499..02e89247bdf 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -812,32 +812,6 @@ (define_split
   DONE;
 })
 
-;; Offset load
-(define_expand "lsx_ld_"
-  [(match_operand:LSX 0 "register_operand")
-   (match_operand 1 "pmode_register_operand")
-   (match_operand 2 "aq10_operand")]
-  "ISA_HAS_LSX"
-{
-  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
-   INTVAL (operands[2]));
-  loongarch_emit_move (operands[0], gen_rtx_MEM (mode, addr));
-  DONE;
-})
-
-;; Offset store
-(define_expand 

  1   2   3   4   >