[gcc r15-982] [APX NF] Support APX NF for lzcnt/tzcnt/popcnt

2024-06-03 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:4371ac5292da9eadc3daccbdce2a15c029a04892

commit r15-982-g4371ac5292da9eadc3daccbdce2a15c029a04892
Author: Lingling Kong 
Date:   Mon Jun 3 14:23:57 2024 +0800

[APX NF] Support APX NF for lzcnt/tzcnt/popcnt

gcc/ChangeLog:

* config/i386/i386.md (clz2_lzcnt_nf): New define_insn.
(*clz2_lzcnt_falsedep_nf): Ditto.
(__nf): Ditto.
(*__falsedep_nf): Ditto.
(_hi): Ditto.
(popcount2_nf): Ditto.
(*popcount2_falsedep_nf): Ditto.
(popcounthi2): Ditto.

Diff:
---
 gcc/config/i386/i386.md | 124 +++-
 1 file changed, 113 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 48ca19cb8df..2c95395b7be 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20283,6 +20283,24 @@
   operands[3] = gen_reg_rtx (mode);
 })
 
+(define_insn_and_split "clz2_lzcnt_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+   (clz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+[(set (match_dup 0)
+ (clz:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "")])
+
 (define_insn_and_split "clz2_lzcnt"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
(clz:SWI48
@@ -20306,6 +20324,18 @@
 ; False dependency happens when destination is only updated by tzcnt,
 ; lzcnt or popcnt.  There is no false dependency when destination is
 ; also used in source.
+(define_insn "*clz2_lzcnt_falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+   (clz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+  UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF && TARGET_LZCNT"
+  "%{nf%} lzcnt{}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "")])
+
 (define_insn "*clz2_lzcnt_falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
(clz:SWI48
@@ -20412,6 +20442,25 @@
 ;; Version of lzcnt/tzcnt that is expanded from intrinsics.  This version
 ;; provides operand size as output when source operand is zero. 
 
+(define_insn_and_split "__nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+   (unspec:SWI48
+ [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  "TARGET_APX_NF"
+  "%{nf%} {}\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+[(set (match_dup 0)
+ (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "type" "")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "")])
+
 (define_insn_and_split "_"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
@@ -20436,6 +20485,19 @@
 ; False dependency happens when destination is only updated by tzcnt,
 ; lzcnt or popcnt.  There is no false dependency when destination is
 ; also used in source.
+(define_insn "*__falsedep_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+   (unspec:SWI48
+ [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+  UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_APX_NF"
+  "%{nf%} {}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "")
+   (set_attr "prefix_0f" "1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "mode" "")])
+
 (define_insn "*__falsedep"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
@@ -20450,13 +20512,12 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "")])
 
-(define_insn "_hi"
+(define_insn "_hi"
   [(set (match_operand:HI 0 "register_operand" "=r")
(unspec:HI
- [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "{w}\t{%1, %0|%0, %1}"
+ [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+  ""
+  "{w}\t{%1, %0|%0, %1}"
   [(set_attr "type" "")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
@@ -20874,6 +20935,30 @@
   [(set_attr "type" "bitmanip")
(set_attr "mode" "")])
 
+(define_insn_and_split "popcount2_nf"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+   (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]

[gcc r15-981] [APX NF] Support APX NF for mul/div

2024-06-03 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:28ac63180e000843ba4a82384407bcc203f27853

commit r15-981-g28ac63180e000843ba4a82384407bcc203f27853
Author: Lingling Kong 
Date:   Mon Jun 3 14:22:07 2024 +0800

[APX NF] Support APX NF for mul/div

gcc/ChangeLog:

* config/i386/i386.md (*mul3_1): New define_insn.
(*mulqi3_1): Ditto.
(*divmod4_noext_nf): Ditto.
(divmodhiqi3): Ditto.

Diff:
---
 gcc/config/i386/i386.md | 47 ++-
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b4233ab99fe..48ca19cb8df 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9900,17 +9900,17 @@
 ;;
 ;; On BDVER1, all HI MULs use DoublePath
 
-(define_insn "*mul3_1"
+(define_insn "*mul3_1"
   [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
(mult:SWIM248
  (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
- (match_operand:SWIM248 2 "" "K,,r")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ (match_operand:SWIM248 2 "" "K,,r")))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+   && "
   "@
-   imul{}\t{%2, %1, %0|%0, %1, %2}
-   imul{}\t{%2, %1, %0|%0, %1, %2}
-   imul{}\t{%2, %0|%0, %2}"
+   imul{}\t{%2, %1, %0|%0, %1, %2}
+   imul{}\t{%2, %1, %0|%0, %1, %2}
+   imul{}\t{%2, %0|%0, %2}"
   [(set_attr "type" "imul")
(set_attr "prefix_0f" "0,0,1")
(set (attr "athlon_decode")
@@ -9971,14 +9971,14 @@
 ;; MUL reg8Direct
 ;; MUL mem8Direct
 
-(define_insn "*mulqi3_1"
+(define_insn "*mulqi3_1"
   [(set (match_operand:QI 0 "register_operand" "=a")
(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
-(match_operand:QI 2 "nonimmediate_operand" "qm")))
-   (clobber (reg:CC FLAGS_REG))]
+(match_operand:QI 2 "nonimmediate_operand" "qm")))]
   "TARGET_QIMODE_MATH
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "mul{b}\t%2"
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+   && "
+  "mul{b}\t%2"
   [(set_attr "type" "imul")
(set_attr "length_immediate" "0")
(set (attr "athlon_decode")
@@ -11121,6 +11121,19 @@
   [(set_attr "type" "multi")
(set_attr "mode" "SI")])
 
+(define_insn "*divmod4_noext_nf"
+  [(set (match_operand:SWIM248 0 "register_operand" "=a")
+   (any_div:SWIM248
+ (match_operand:SWIM248 2 "register_operand" "0")
+ (match_operand:SWIM248 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SWIM248 1 "register_operand" "=d")
+   (:SWIM248 (match_dup 2) (match_dup 3)))
+   (use (match_operand:SWIM248 4 "register_operand" "1"))]
+  "TARGET_APX_NF"
+  "%{nf%} div{}\t%3"
+  [(set_attr "type" "idiv")
+   (set_attr "mode" "")])
+
 (define_insn "*divmod4_noext"
   [(set (match_operand:SWIM248 0 "register_operand" "=a")
(any_div:SWIM248
@@ -11268,7 +11281,7 @@
 ;; Change div/mod to HImode and extend the second argument to HImode
 ;; so that mode of div/mod matches with mode of arguments.  Otherwise
 ;; combine may fail.
-(define_insn "divmodhiqi3"
+(define_insn "divmodhiqi3"
   [(set (match_operand:HI 0 "register_operand" "=a")
(ior:HI
  (ashift:HI
@@ -11280,10 +11293,10 @@
(const_int 8))
  (zero_extend:HI
(truncate:QI
- (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_QIMODE_MATH"
-  "div{b}\t%2"
+ (div:HI (match_dup 1) (any_extend:HI (match_dup 2)))]
+  "TARGET_QIMODE_MATH
+   && "
+  "div{b}\t%2"
   [(set_attr "type" "idiv")
(set_attr "mode" "QI")])


[gcc r15-979] [APX NF] Support APX NF for rotate insns

2024-06-03 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:6995d55b7325e03fa1e425f8ad25b7731dea

commit r15-979-g6995d55b7325e03fa1e425f8ad25b7731dea
Author: Lingling Kong 
Date:   Mon Jun 3 11:13:18 2024 +0800

[APX NF] Support APX NF for rotate insns

gcc/ChangeLog:

* config/i386/i386.md (ashr3_cvt): New
define_insn.
(*3_1): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/apx-nf.c: Add test.

Diff:
---
 gcc/config/i386/i386.md| 59 ++
 gcc/testsuite/gcc.target/i386/apx-nf.c |  5 +++
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f1c828a6dc3..7d3543eee96 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16247,19 +16247,19 @@
 (define_mode_attr cvt_mnemonic
   [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
 
-(define_insn "ashr3_cvt"
+(define_insn "ashr3_cvt"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r")
(ashiftrt:SWI48
  (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm")
- (match_operand:QI 2 "const_int_operand")))
-   (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 2 "const_int_operand")))]
   "INTVAL (operands[2]) == GET_MODE_BITSIZE (mode)-1
&& (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)"
+   && ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)
+   && "
   "@

-   sar{}\t{%2, %0|%0, %2}
-   sar{}\t{%2, %1, %0|%0, %1, %2}"
+   sar{}\t{%2, %0|%0, %2}
+   sar{}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "*,*,apx_ndd")
(set_attr "type" "imovx,ishift,ishift")
(set_attr "prefix_0f" "0,*,*")
@@ -17111,28 +17111,31 @@
   [(set_attr "type" "rotatex")
(set_attr "mode" "")])
 
-(define_insn "*3_1"
+(define_insn "*3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
(any_rotate:SWI48
  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
- (match_operand:QI 2 "nonmemory_operand" "c,,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c,,c")))]
+  "ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)
+   && "
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
 {
 case TYPE_ROTATEX:
-  return "#";
+  if (TARGET_APX_NDD && )
+   return "%{nf%} {}\t{%2, %1, %0|%0, %1, %2}";
+  else
+   return "#";
 
 default:
   if (operands[2] == const1_rtx
  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !)
return "{}\t%0";
   else
-   return use_ndd ? "{}\t{%2, %1, %0|%0, %1, %2}"
-  : "{}\t{%2, %0|%0, %2}";
+   return use_ndd ? "{}\t{%2, %1, %0|%0, 
%1, %2}"
+  : "{}\t{%2, %0|%0, %2}";
 }
 }
   [(set_attr "isa" "*,bmi2,apx_ndd")
@@ -17166,6 +17169,20 @@
   operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
 })
 
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+   (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+ (match_operand:QI 2 "const_int_operand")))]
+  "TARGET_BMI2 && reload_completed && !optimize_function_for_size_p (cfun)
+   && !TARGET_APX_NDD"
+  [(set (match_dup 0)
+   (rotatert:SWI48 (match_dup 1) (match_dup 2)))]
+{
+  int bitsize = GET_MODE_BITSIZE (mode);
+
+  operands[2] = GEN_INT ((bitsize - INTVAL (operands[2])) % bitsize);
+})
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
@@ -17253,22 +17270,22 @@
   [(set (match_dup 0)
(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2])
 
-(define_insn "*3_1"
+(define_insn "*3_1"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m,r")
(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm")
- (match_operand:QI 2 "nonmemory_operand" "c,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" 
"c,c")))]
+  "ix86_binary_operator_ok (, mode, operands, TARGET_APX_NDD)
+   && "
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-  && !use_ndd)
+  && !use_ndd && !)
 return "{}\t%0";
   else
 return use_ndd
-  ? "{}\t{%2, %1, %0|%0, %1, %2}"
-  : "{}\t{%2, %0|%0, %2}";
+  ? "{}\t{%2, %1, %0|%0, %1, %2}"
+  : "{}\t{%2, %0|%0, %2}";
 }
   [(set_attr "isa" "*,apx_ndd")
(set_attr "type" "rotate")
diff --git a/gcc/testsuite/gcc.target/i386/apx-nf.c 

[gcc r15-980] [APX NF] Support APX NF for shld/shrd

2024-06-03 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:8cd918dd416be0d8f40e2d940e89bbca0f374e7f

commit r15-980-g8cd918dd416be0d8f40e2d940e89bbca0f374e7f
Author: Lingling Kong 
Date:   Mon Jun 3 11:16:12 2024 +0800

[APX NF] Support APX NF for shld/shrd

gcc/ChangeLog:

* config/i386/i386.md (x86_64_shld): New define_insn.
(x86_64_shld): Ditto.
(x86_64_shld_ndd): Ditto.
(x86_64_shld_1): Ditto.
(x86_64_shld_ndd_1): Ditto.
(*x86_64_shld_shrd_1_nozext_nf): Ditto.
(x86_shld): Ditto.
(x86_shld_ndd): Ditto.
(x86_shld_1): Ditto.
(x86_shld_ndd_1): Ditto.
(*x86_shld_shrd_1_nozext_nf): Ditto.
(3_doubleword_lowpart_nf): Ditto.
(x86_64_shrd): Ditto.
(x86_64_shrd_ndd): Ditto.
(x86_64_shrd_1): Ditto.
(x86_64_shrd_ndd_1): Ditto.
(*x86_64_shrd_shld_1_nozext_nf): Ditto.
(x86_shrd): Ditto.
(x86_shrd_ndd): Ditto.
(x86_shrd_1): Ditto.
(x86_shrd_ndd_1): Ditto.
(*x86_shrd_shld_1_nozext_nf): Ditto.

Diff:
---
 gcc/config/i386/i386.md | 389 ++--
 1 file changed, 308 insertions(+), 81 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7d3543eee96..b4233ab99fe 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14553,7 +14553,7 @@
   DONE;
 })
 
-(define_insn "x86_64_shld"
+(define_insn "x86_64_shld"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
 (ior:DI (ashift:DI (match_dup 0)
  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
@@ -14563,10 +14563,9 @@
(zero_extend:TI
  (match_operand:DI 1 "register_operand" "r"))
(minus:QI (const_int 64)
- (and:QI (match_dup 2) (const_int 63 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+ (and:QI (match_dup 2) (const_int 63 0)))]
+  "TARGET_64BIT && "
+  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
@@ -14574,7 +14573,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shld_ndd"
+(define_insn "x86_64_shld_ndd"
   [(set (match_operand:DI 0 "register_operand" "=r")
 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
  (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc")
@@ -14584,14 +14583,13 @@
(zero_extend:TI
  (match_operand:DI 2 "register_operand" "r"))
(minus:QI (const_int 64)
- (and:QI (match_dup 3) (const_int 63 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_APX_NDD"
-  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ (and:QI (match_dup 3) (const_int 63 0)))]
+  "TARGET_APX_NDD && "
+  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ishift")
(set_attr "mode" "DI")])
 
-(define_insn "x86_64_shld_1"
+(define_insn "x86_64_shld_1"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
 (ior:DI (ashift:DI (match_dup 0)
   (match_operand:QI 2 "const_0_to_63_operand"))
@@ -14599,11 +14597,11 @@
  (lshiftrt:TI
(zero_extend:TI
  (match_operand:DI 1 "register_operand" "r"))
-   (match_operand:QI 3 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+   (match_operand:QI 3 "const_0_to_255_operand")) 0)))]
   "TARGET_64BIT
-   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
-  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])
+   && "
+  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
@@ -14612,7 +14610,7 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
 
-(define_insn "x86_64_shld_ndd_1"
+(define_insn "x86_64_shld_ndd_1"
   [(set (match_operand:DI 0 "register_operand" "=r")
 (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm")
   (match_operand:QI 3 "const_0_to_63_operand"))
@@ -14620,15 +14618,69 @@
  (lshiftrt:TI
(zero_extend:TI
  (match_operand:DI 2 "register_operand" "r"))
-   (match_operand:QI 4 "const_0_to_255_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
+   (match_operand:QI 4 "const_0_to_255_operand")) 0)))]
   "TARGET_APX_NDD
-   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])"
-  "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+   && INTVAL (operands[4]) == 64 - INTVAL (operands[3])
+   && "
+ 

[gcc r15-978] [APX NF] Support APX NF for right shift insns

2024-06-03 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:0964a8f15e9b129522940ab0911b7d3fa4754f98

commit r15-978-g0964a8f15e9b129522940ab0911b7d3fa4754f98
Author: Lingling Kong 
Date:   Mon Jun 3 11:10:36 2024 +0800

[APX NF] Support APX NF for right shift insns

gcc/ChangeLog:

* config/i386/i386.md (*ashr3_1): New
define_insn.
(*lshr3_1): Ditto.
(*lshrqi3_1): Ditto.
(*lshrhi3_1): Ditto.

Diff:
---
 gcc/config/i386/i386.md | 82 +++--
 1 file changed, 46 insertions(+), 36 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8063cb576f5..f1c828a6dc3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16325,13 +16325,13 @@
   [(set_attr "type" "ishiftx")
(set_attr "mode" "")])
 
-(define_insn "*ashr3_1"
+(define_insn "*ashr3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
(ashiftrt:SWI48
  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm")
- (match_operand:QI 2 "nonmemory_operand" "c,r,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c,r,c")))]
+  "ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)
+   && "
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -16342,11 +16342,11 @@
 default:
   if (operands[2] == const1_rtx
  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !)
return "sar{}\t%0";
   else
-   return use_ndd ? "sar{}\t{%2, %1, %0|%0, %1, %2}"
-  : "sar{}\t{%2, %0|%0, %2}";
+   return use_ndd ? "sar{}\t{%2, %1, %0|%0, %1, 
%2}"
+  : "sar{}\t{%2, %0|%0, %2}";
 }
 }
   [(set_attr "isa" "*,bmi2,apx_ndd")
@@ -16386,14 +16386,13 @@
 }
 [(set_attr "isa" "*,*,*,apx_ndd")])
 
-
-(define_insn "*lshr3_1"
+(define_insn "*lshr3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r")
(lshiftrt:SWI48
  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm")
- (match_operand:QI 2 "nonmemory_operand" "c,r,,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c,r,,c")))]
+  "ix86_binary_operator_ok (LSHIFTRT, mode, operands, TARGET_APX_NDD)
+   && "
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
@@ -16405,11 +16404,11 @@
 default:
   if (operands[2] == const1_rtx
  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !)
return "shr{}\t%0";
   else
-   return use_ndd ? "shr{}\t{%2, %1, %0|%0, %1, %2}"
-  : "shr{}\t{%2, %0|%0, %2}";
+   return use_ndd ? "shr{}\t{%2, %1, %0|%0, %1, 
%2}"
+  : "shr{}\t{%2, %0|%0, %2}";
 }
 }
   [(set_attr "isa" "*,bmi2,avx512bw,apx_ndd")
@@ -16425,6 +16424,17 @@
(set_attr "mode" "")])
 
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c,
+;; and it has no flag.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+   (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+  (match_operand:QI 2 "register_operand")))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+   (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (mode, operands[2]);")
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
@@ -16493,22 +16503,22 @@
(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
-(define_insn "*ashr3_1"
+(define_insn "*ashr3_1"
   [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m, r")
(ashiftrt:SWI12
  (match_operand:SWI12 1 "nonimmediate_operand" "0, rm")
- (match_operand:QI 2 "nonmemory_operand" "c, c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" "c, c")))]
+  "ix86_binary_operator_ok (ASHIFTRT, mode, operands, TARGET_APX_NDD)
+   && "
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   if (operands[2] == const1_rtx
   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-  && !use_ndd)
+  && !use_ndd && !)
 return "sar{}\t%0";
   else
-return use_ndd ? "sar{}\t{%2, %1, %0|%0, %1, %2}"
-  : "sar{}\t{%2, %0|%0, %2}";
+return use_ndd ? "sar{}\t{%2, %1, %0|%0, %1, %2}"
+  : "sar{}\t{%2, %0|%0, %2}";
 }
   [(set_attr "isa" "*, 

[gcc r15-977] [APX NF] Support APX NF for left shift insns

2024-06-03 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:8e6a558d4123185962a54de53b104310e248880d

commit r15-977-g8e6a558d4123185962a54de53b104310e248880d
Author: Lingling Kong 
Date:   Mon Jun 3 11:05:41 2024 +0800

[APX NF] Support APX NF for left shift insns

gcc/ChangeLog:

* config/i386/i386.md (*ashl3_1): New
define_insn.
(*ashlhi3_1): Ditto.
(*ashlqi3_1): Ditto.
* config/i386/sse.md: New define_split.

Diff:
---
 gcc/config/i386/i386.md | 96 +++--
 gcc/config/i386/sse.md  | 13 +++
 2 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5653f5ce123..8063cb576f5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15013,17 +15013,22 @@
   [(set_attr "type" "ishiftx")
(set_attr "mode" "")])
 
-(define_insn "*ashl3_1"
+(define_insn "*ashl3_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" 
"0,l,rm,k,rm")
- (match_operand:QI 2 "nonmemory_operand" 
"c,M,r,,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, mode, operands, TARGET_APX_NDD)"
+ (match_operand:QI 2 "nonmemory_operand" 
"c,M,r,,c")))]
+  "ix86_binary_operator_ok (ASHIFT, mode, operands, TARGET_APX_NDD)
+   && "
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
 {
 case TYPE_LEA:
+  if (TARGET_APX_NDD && )
+   return "%{nf%} sal{}\t{%2, %1, %0|%0, %1, %2}";
+  else
+   return "#";
+
 case TYPE_ISHIFTX:
 case TYPE_MSKLOG:
   return "#";
@@ -15031,7 +15036,7 @@
 case TYPE_ALU:
   gcc_assert (operands[2] == const1_rtx);
   gcc_assert (rtx_equal_p (operands[0], operands[1]));
-  return "add{}\t%0, %0";
+  return "add{}\t%0, %0";
 
 default:
   if (operands[2] == const1_rtx
@@ -15039,11 +15044,11 @@
  /* For NDD form instructions related to TARGET_SHIFT1, the $1
 immediate do not need to be omitted as assembler will map it
 to use shorter encoding. */
- && !use_ndd)
+ && !use_ndd && !)
return "sal{}\t%0";
   else
-   return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}"
-  : "sal{}\t{%2, %0|%0, %2}";
+   return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, 
%2}"
+  : "sal{}\t{%2, %0|%0, %2}";
 }
 }
   [(set_attr "isa" "*,*,bmi2,avx512bw,apx_ndd")
@@ -15074,6 +15079,17 @@
(set_attr "mode" "")])
 
 ;; Convert shift to the shiftx pattern to avoid flags dependency.
+;; For NF/NDD doesn't support shift count as r, it just support c,
+;; and it has no flag.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand")
+   (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
+ (match_operand:QI 2 "register_operand")))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+   (ashift:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (mode, operands[2]);")
+
 (define_split
   [(set (match_operand:SWI48 0 "register_operand")
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand")
@@ -15160,32 +15176,37 @@
(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2]
   "operands[2] = gen_lowpart (SImode, operands[2]);")
 
-(define_insn "*ashlhi3_1"
+(define_insn "*ashlhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
-  (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)"
+  (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))]
+  "ix86_binary_operator_ok (ASHIFT, HImode, operands, TARGET_APX_NDD)
+   && "
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
 {
 case TYPE_LEA:
+  if (TARGET_APX_NDD && )
+   return "%{nf%} sal{w}\t{%2, %1, %0|%0, %1, %2}";
+  else
+   return "#";
+
 case TYPE_MSKLOG:
   return "#";
 
 case TYPE_ALU:
   gcc_assert (operands[2] == const1_rtx);
-  return "add{w}\t%0, %0";
+  return "add{w}\t%0, %0";
 
 default:
   if (operands[2] == const1_rtx
  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
- && !use_ndd)
+ && !use_ndd && !)
return "sal{w}\t%0";
   else
-   return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
-  : "sal{w}\t{%2, %0|%0, %2}";
+   return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
+  : "sal{w}\t{%2, %0|%0, %2}";
 }
 }
   [(set_attr "isa" "*,*,avx512f,apx_ndd")
@@ -15213,31 +15234,36 @@
(const_string "*")))
(set_attr "mode" "HI,SI,HI,HI")])
 
-(define_insn 

[gcc r15-976] [APX NF] Support APX NF for {sub/and/or/xor/neg}

2024-06-03 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:85a70b2536883c0ffa0f72d20638e0679acd9070

commit r15-976-g85a70b2536883c0ffa0f72d20638e0679acd9070
Author: Lingling Kong 
Date:   Mon Jun 3 11:00:03 2024 +0800

[APX NF] Support APX NF for {sub/and/or/xor/neg}

gcc/ChangeLog:

* config/i386/i386.md (nf_nonf_attr): New subst_attr.
(nf_nonf_x64_attr): Ditto.
(*sub_1): New define_insn.
(*anddi_1): Ditto.
(*and_1): Ditto.
(*andqi_1): Ditto.
(*_1): Ditto.
(*qi_1): Ditto.
(*neg_1): Ditto.
* config/i386/sse.md: New define_split.

gcc/testsuite/ChangeLog:

* gcc.target/i386/apx-nf.c: New test.

Diff:
---
 gcc/config/i386/i386.md| 173 +
 gcc/config/i386/sse.md |  11 +++
 gcc/testsuite/gcc.target/i386/apx-nf.c |  12 +++
 3 files changed, 114 insertions(+), 82 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4b2d9e71c43..5653f5ce123 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -575,7 +575,7 @@
noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
-   vaes_avx512vl"
+   vaes_avx512vl,noapx_nf"
   (const_string "base"))
 
 ;; The (bounding maximum) length of an instruction immediate.
@@ -981,6 +981,7 @@
   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
 (eq_attr "mmx_isa" "avx")
   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
+(eq_attr "isa" "noapx_nf") (symbol_ref "!TARGET_APX_NF")
]
(const_int 1)))
 
@@ -6449,6 +6450,8 @@
 (define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
 (define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
 (define_subst_attr "nf_applied" "nf_subst" "true" "false")
+(define_subst_attr "nf_nonf_attr" "nf_subst"  "noapx_nf" "*")
+(define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64")
 
 (define_subst "nf_subst"
   [(set (match_operand:SWI 0)
@@ -7895,20 +7898,21 @@
   "split_double_mode (mode, [0], 2, [0], [3]);"
 [(set_attr "isa" "*,*,apx_ndd,apx_ndd")])
 
-(define_insn "*sub_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r,r")
+(define_insn "*sub_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" 
"=m,r,,r,r,r")
(minus:SWI
- (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,rjM,r")
- (match_operand:SWI 2 "" ",,r,,")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)"
+ (match_operand:SWI 1 "nonimmediate_operand" "0,0,0,rm,rjM,r")
+ (match_operand:SWI 2 "" ",,,r,,")))]
+  "ix86_binary_operator_ok (MINUS, mode, operands, TARGET_APX_NDD)
+  && "
   "@
-  sub{}\t{%2, %0|%0, %2}
-  sub{}\t{%2, %0|%0, %2}
-  sub{}\t{%2, %1, %0|%0, %1, %2}
-  sub{}\t{%2, %1, %0|%0, %1, %2}
-  sub{}\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd")
+  sub{}\t{%2, %0|%0, %2}
+  sub{}\t{%2, %0|%0, %2}
+  sub{}\t{%2, %0|%0, %2}
+  sub{}\t{%2, %1, %0|%0, %1, %2}
+  sub{}\t{%2, %1, %0|%0, %1, %2}
+  sub{}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd")
(set_attr "type" "alu")
(set_attr "mode" "")])
 
@@ -11797,27 +11801,28 @@
 }
 [(set_attr "isa" "*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd_64,apx_ndd")])
 
-(define_insn "*anddi_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,r,?k")
+(define_insn "*anddi_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" 
"=r,r,rm,r,r,r,r,r,r,?k")
(and:DI
-(match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,rjM,r,qm,k")
-(match_operand:DI 2 "x86_64_szext_general_operand" 
"Z,Z,re,m,r,e,m,L,k")))
-   (clobber (reg:CC FLAGS_REG))]
+(match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,0,rm,rjM,r,qm,k")
+(match_operand:DI 2 "x86_64_szext_general_operand" 
"Z,Z,r,e,m,r,e,m,L,k")))]
   "TARGET_64BIT
-   && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)"
+   && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)
+   && "
   "@
-   and{l}\t{%k2, %k0|%k0, %k2}
-   and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
-   and{q}\t{%2, %0|%0, %2}
-   and{q}\t{%2, %0|%0, %2}
-   and{q}\t{%2, %1, %0|%0, %1, %2}
-   and{q}\t{%2, %1, %0|%0, %1, %2}
-   and{q}\t{%2, %1, %0|%0, %1, %2}
+   and{l}\t{%k2, %k0|%k0, %k2}
+   and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2}
+   and{q}\t{%2, %0|%0, %2}
+   and{q}\t{%2, %0|%0, %2}
+   and{q}\t{%2, %0|%0, %2}
+   and{q}\t{%2, %1, %0|%0, %1, %2}
+   and{q}\t{%2, %1, %0|%0, %1, %2}
+   and{q}\t{%2, %1, %0|%0, %1, %2}
#
#"
-  [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,apx_ndd,x64,avx512bw")
-   (set_attr "type" "alu,alu,alu,alu,alu,alu,alu,imovx,msklog")
-   

[gcc r15-975] [APX NF] Support APX NF add

2024-06-03 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:2fb9363dc3596cd7fda90a8c816d7e27dd9e085b

commit r15-975-g2fb9363dc3596cd7fda90a8c816d7e27dd9e085b
Author: Lingling Kong 
Date:   Mon Jun 3 10:38:43 2024 +0800

[APX NF] Support APX NF add

APX NF(no flags) feature implements suppresses the update of status flags
for arithmetic operations.

For NF add, it is not clear whether nf add can be faster than lea. If so,
the pattern needs to be adjusted to perfer lea generation.

gcc/ChangeLog:

* config/i386/i386-opts.h (enum apx_features): Add nf
enumeration.
* config/i386/i386.h (TARGET_APX_NF): New.
* config/i386/i386.md (nf_name): New subst_att.
(nf_prefix): Ditto.
(nf_condition): Ditto.
(nf_mem_constraint): Ditto.
(nf_applied): Ditto.
(nf_subst): Add new define_subst.
(*add_1): New define_insn.
(*addhi_1): Ditto.
(*addqi_1): Diito.
* config/i386/i386.opt: Add apx_nf enumeration.

gcc/testsuite/ChangeLog:

* gcc.target/i386/apx-ndd.c: Fixed test.

Co-authored-by: Hongyu Wong 

Diff:
---
 gcc/config/i386/i386-opts.h |   3 +-
 gcc/config/i386/i386.h  |   1 +
 gcc/config/i386/i386.md | 137 ++--
 gcc/config/i386/i386.opt|   3 +
 gcc/testsuite/gcc.target/i386/apx-ndd.c |   2 +-
 5 files changed, 100 insertions(+), 46 deletions(-)

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index ef2825803b3..60176ce609f 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -140,7 +140,8 @@ enum apx_features {
   apx_push2pop2 = 1 << 1,
   apx_ndd = 1 << 2,
   apx_ppx = 1 << 3,
-  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx,
+  apx_nf = 1<< 4,
+  apx_all = apx_egpr | apx_push2pop2 | apx_ndd | apx_ppx | apx_nf,
 };
 
 #endif
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 359a8408263..969391d3013 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #define TARGET_APX_PUSH2POP2 (ix86_apx_features & apx_push2pop2)
 #define TARGET_APX_NDD (ix86_apx_features & apx_ndd)
 #define TARGET_APX_PPX (ix86_apx_features & apx_ppx)
+#define TARGET_APX_NF (ix86_apx_features & apx_nf)
 
 #include "config/vxworks-dummy.h"
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 375654cf74e..4b2d9e71c43 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6290,6 +6290,13 @@
   [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
   (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_split
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+   (mult:SWI48 (match_dup 0) (match_operand:SWI48 1 "const1248_operand")))]
+  "TARGET_APX_NF && reload_completed"
+  [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))]
+  "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
 
 ;; Add instructions
 
@@ -6437,48 +6444,65 @@
  (clobber (reg:CC FLAGS_REG))])]
  "split_double_mode (mode, [0], 1, [0], [5]);")
 
-(define_insn "*add_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r")
+(define_subst_attr "nf_name" "nf_subst" "_nf" "")
+(define_subst_attr "nf_prefix" "nf_subst" "%{nf%} " "")
+(define_subst_attr "nf_condition" "nf_subst" "TARGET_APX_NF" "true")
+(define_subst_attr "nf_mem_constraint" "nf_subst" "je" "m")
+(define_subst_attr "nf_applied" "nf_subst" "true" "false")
+
+(define_subst "nf_subst"
+  [(set (match_operand:SWI 0)
+   (match_operand:SWI 1))]
+  ""
+  [(set (match_dup 0)
+   (match_dup 1))
+   (clobber (reg:CC FLAGS_REG))])
+
+(define_insn "*add_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" 
"=rm,r,r,r,r,r,r,r")
(plus:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rje,jM,r")
- (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,BM")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)"
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,0,r,r,rje,jM,r")
+ (match_operand:SWI48 2 "x86_64_general_operand" 
"r,e,BM,0,le,r,e,BM")))]
+  "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)
+  && "
 {
   bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
   switch (get_attr_type (insn))
 {
 case TYPE_LEA:
-  return "#";
+  if (TARGET_APX_NDD && )
+   return "%{nf%} add{}\t{%2, %1, %0|%0, %1, %2}";
+  else
+   return "#";
 
 case TYPE_INCDEC:
   if (operands[2] == const1_rtx)
-return use_ndd ? "inc{}\t{%1, %0|%0, %1}"
- : "inc{}\t%0";
+   return use_ndd ? "inc{}\t{%1, %0|%0, %1}"
+   

[gcc r15-334] i386: fix ix86_hardreg_mov_ok with lra_in_progress

2024-05-08 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:baf1a677955a4dcfffe8d93966900af96600d642

commit r15-334-gbaf1a677955a4dcfffe8d93966900af96600d642
Author: konglin1 
Date:   Thu May 9 09:48:56 2024 +0800

i386: fix ix86_hardreg_mov_ok with lra_in_progress

Originally eliminate_regs_in_insnit will transform
(parallel [
  (set (reg:QI 130)
(plus:QI (subreg:QI (reg:DI 19 frame) 0)
  (const_int 96)))
  (clobber (reg:CC 17 flag))]) {*addqi_1}
to
(set (reg:QI 130)
 (subreg:QI (reg:DI 19 frame) 0)) {*movqi_internal}
when verify_changes.

But with No Flags add, it transforms
(set (reg:QI 5 di)
  (plus:QI (subreg:QI (reg:DI 19 frame) 0)
   (const_int 96))) {*addqi_1_nf}
to
(set (reg:QI 5 di)
 (subreg:QI (reg:DI 19 frame) 0)) {*addqi_1_nf}.
there is no extra clobbers at the end, and
its dest reg just is a hardreg. For ix86_hardreg_mov_ok,
it returns false. So it fails to update insn and causes
the ICE when transform to movqi_internal.

But actually it is ok and safe for ix86_hardreg_mov_ok
when lra_in_progress.

And tested the spec2017, the performance was not affected.

gcc/ChangeLog:

* config/i386/i386.cc (ix86_hardreg_mov_ok): Relax
hard reg mov restriction when lra in progress.

Diff:
---
 gcc/config/i386/i386.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index c2df4ab91ee9..54c6c445bf14 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20355,7 +20355,8 @@ ix86_hardreg_mov_ok (rtx dst, rtx src)
   ? standard_sse_constant_p (src, GET_MODE (dst))
   : x86_64_immediate_operand (src, GET_MODE (dst)))
   && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
-  && !reload_completed)
+  && !reload_completed
+  && !lra_in_progress)
 return false;
   return true;
 }


[gcc r15-313] x86: Fix cmov cost model issue [PR109549]

2024-05-08 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:d826f7945609046f922732b138fb90795d5b1985

commit r15-313-gd826f7945609046f922732b138fb90795d5b1985
Author: konglin1 
Date:   Wed May 8 15:46:10 2024 +0800

x86: Fix cmov cost model issue [PR109549]

(if_then_else:SI (eq (reg:CCZ 17 flags)
(const_int 0 [0]))
(reg/v:SI 101 [ e ])
(reg:SI 102))
The cost is 8 for the rtx, the cost for
(eq (reg:CCZ 17 flags) (const_int 0 [0])) is 4,
but this is just an operator do not need to compute it's cost in cmov.

gcc/ChangeLog:

PR target/109549
* config/i386/i386.cc (ix86_rtx_costs): The XEXP (x, 0) for cmov
is an operator do not need to compute cost.

gcc/testsuite/ChangeLog:

* gcc.target/i386/cmov6.c: Fixed.

Diff:
---
 gcc/config/i386/i386.cc   | 2 +-
 gcc/testsuite/gcc.target/i386/cmov6.c | 5 +
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index e58335adc8f0..c2df4ab91ee9 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22237,7 +22237,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
{
  /* cmov.  */
  *total = COSTS_N_INSNS (1);
- if (!REG_P (XEXP (x, 0)))
+ if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
*total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
  if (!REG_P (XEXP (x, 1)))
*total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
diff --git a/gcc/testsuite/gcc.target/i386/cmov6.c 
b/gcc/testsuite/gcc.target/i386/cmov6.c
index 5111c8a90995..535326e4c2a3 100644
--- a/gcc/testsuite/gcc.target/i386/cmov6.c
+++ b/gcc/testsuite/gcc.target/i386/cmov6.c
@@ -1,9 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=k8" } */
-/* if-converting this sequence would require two cmov
-   instructions and seems to always cost more independent
-   of the TUNE_ONE_IF_CONV setting.  */
-/* { dg-final { scan-assembler-not "cmov\[^6\]" } } */
+/* { dg-final { scan-assembler "cmov\[^6\]" } } */
 
 /* Verify that blocks are converted to conditional moves.  */
 extern int bar (int, int);