Hi All,
GCC 15 added two new fusions CMP+CSEL and CMP+CSET.
This patch enables them for cores that support based on their Software
Optimization Guides and generically on Armv9-A. Even if a core does not
support it there's no negative performance impact.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-fusion-pairs.def (AARCH64_FUSE_NEOVERSE_BASE):
New.
* config/aarch64/tuning_models/cortexx925.h: Use it.
* config/aarch64/tuning_models/generic_armv9_a.h: Use it.
* config/aarch64/tuning_models/neoverse512tvb.h: Use it.
* config/aarch64/tuning_models/neoversen2.h: Use it.
* config/aarch64/tuning_models/neoversen3.h: Use it.
* config/aarch64/tuning_models/neoversev1.h: Use it.
* config/aarch64/tuning_models/neoversev2.h: Use it.
* config/aarch64/tuning_models/neoversev3.h: Use it.
* config/aarch64/tuning_models/neoversev3ae.h: Use it.
---
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def
b/gcc/config/aarch64/aarch64-fusion-pairs.def
index
f8413ab0c802c28290ebcc171bfd131622cb33be..0123430d988b96b20d20376df9ae7a196031286d
100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -45,4 +45,8 @@ AARCH64_FUSION_PAIR ("cmp+cset", CMP_CSET)
/* Baseline fusion settings suitable for all cores. */
#define AARCH64_FUSE_BASE (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC)
+/* Baseline fusion settings suitable for all Neoverse cores. */
+#define AARCH64_FUSE_NEOVERSE_BASE (AARCH64_FUSE_BASE | AARCH64_FUSE_CMP_CSEL \
+ | AARCH64_FUSE_CMP_CSET)
+
#define AARCH64_FUSE_MOVK (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK)
diff --git a/gcc/config/aarch64/tuning_models/cortexx925.h
b/gcc/config/aarch64/tuning_models/cortexx925.h
index
eb9b89984b0472858bc08dba924c962ec4ba53bd..b3ae1576ade1f701b775496def277230e193d20f
100644
--- a/gcc/config/aarch64/tuning_models/cortexx925.h
+++ b/gcc/config/aarch64/tuning_models/cortexx925.h
@@ -205,7 +205,7 @@ static const struct tune_params cortexx925_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
index
48353a59939d84647c6981d6d0551af7ce9df751..e971e645dfc4b805b7f994a15a5df7803ff4dc6c
100644
--- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
@@ -236,7 +236,7 @@ static const struct tune_params generic_armv9_a_tunings =
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoverse512tvb.h
b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
index
c407b89a22f1aecbfd594b493be4fbaf1f9b0437..f72505918f3aa64200aa596dbe2d7d4a3de9c08c
100644
--- a/gcc/config/aarch64/tuning_models/neoverse512tvb.h
+++ b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
@@ -143,7 +143,7 @@ static const struct tune_params neoverse512tvb_tunings =
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h
b/gcc/config/aarch64/tuning_models/neoversen2.h
index
18199ac206c6cbfcef8695b497401b78a8f77f38..47d861232951f92bebbd09fbca8b1ff8624949fc
100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -205,7 +205,7 @@ static const struct tune_params neoversen2_tunings =
1 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversen3.h
b/gcc/config/aarch64/tuning_models/neoversen3.h
index
4da85cfac0d185a5d59439f6d19d90ace0354e8f..356b5f8cc83b69e74ddac70db7a3883bc1b7a29b
100644
--- a/gcc/config/aarch64/tuning_models/neoversen3.h
+++ b/gcc/config/aarch64/tuning_models/neoversen3.h
@@ -205,7 +205,7 @@ static const struct tune_params neoversen3_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h
b/gcc/config/aarch64/tuning_models/neoversev1.h
index
dd9120eee48ae4fa30d589173788389c2413cdfe..9698f135a4f86a74c19cc601508ca9cb948f9298
100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -214,7 +214,7 @@ static const struct tune_params neoversev1_tunings =
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h
b/gcc/config/aarch64/tuning_models/neoversev2.h
index
1369de73991cb9413ea3ea8fb292b7589420bab2..7e812d26556690f2e46e86fabcf5f63144370ac0
100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -218,7 +218,7 @@ static const struct tune_params neoversev2_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- (AARCH64_FUSE_BASE | AARCH64_FUSE_CMP_CSEL | AARCH64_FUSE_CMP_CSET), /*
fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev3.h
b/gcc/config/aarch64/tuning_models/neoversev3.h
index
d8c82255378c81d4d2437412d41f3cbbd456f9c3..f100d484e6b2f1e63b5aedeee14bddcd29c917c9
100644
--- a/gcc/config/aarch64/tuning_models/neoversev3.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3.h
@@ -205,7 +205,7 @@ static const struct tune_params neoversev3_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev3ae.h
b/gcc/config/aarch64/tuning_models/neoversev3ae.h
index
7f050501ede7108e1109ea0b5efae355380d07dc..1f59865ceadb37817def0529bdcc4b3530fef99a
100644
--- a/gcc/config/aarch64/tuning_models/neoversev3ae.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3ae.h
@@ -205,7 +205,7 @@ static const struct tune_params neoversev3ae_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
--
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index f8413ab0c802c28290ebcc171bfd131622cb33be..0123430d988b96b20d20376df9ae7a196031286d 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -45,4 +45,8 @@ AARCH64_FUSION_PAIR ("cmp+cset", CMP_CSET)
/* Baseline fusion settings suitable for all cores. */
#define AARCH64_FUSE_BASE (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC)
+/* Baseline fusion settings suitable for all Neoverse cores. */
+#define AARCH64_FUSE_NEOVERSE_BASE (AARCH64_FUSE_BASE | AARCH64_FUSE_CMP_CSEL \
+ | AARCH64_FUSE_CMP_CSET)
+
#define AARCH64_FUSE_MOVK (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK)
diff --git a/gcc/config/aarch64/tuning_models/cortexx925.h b/gcc/config/aarch64/tuning_models/cortexx925.h
index eb9b89984b0472858bc08dba924c962ec4ba53bd..b3ae1576ade1f701b775496def277230e193d20f 100644
--- a/gcc/config/aarch64/tuning_models/cortexx925.h
+++ b/gcc/config/aarch64/tuning_models/cortexx925.h
@@ -205,7 +205,7 @@ static const struct tune_params cortexx925_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
index 48353a59939d84647c6981d6d0551af7ce9df751..e971e645dfc4b805b7f994a15a5df7803ff4dc6c 100644
--- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
@@ -236,7 +236,7 @@ static const struct tune_params generic_armv9_a_tunings =
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoverse512tvb.h b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
index c407b89a22f1aecbfd594b493be4fbaf1f9b0437..f72505918f3aa64200aa596dbe2d7d4a3de9c08c 100644
--- a/gcc/config/aarch64/tuning_models/neoverse512tvb.h
+++ b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
@@ -143,7 +143,7 @@ static const struct tune_params neoverse512tvb_tunings =
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 18199ac206c6cbfcef8695b497401b78a8f77f38..47d861232951f92bebbd09fbca8b1ff8624949fc 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -205,7 +205,7 @@ static const struct tune_params neoversen2_tunings =
1 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversen3.h b/gcc/config/aarch64/tuning_models/neoversen3.h
index 4da85cfac0d185a5d59439f6d19d90ace0354e8f..356b5f8cc83b69e74ddac70db7a3883bc1b7a29b 100644
--- a/gcc/config/aarch64/tuning_models/neoversen3.h
+++ b/gcc/config/aarch64/tuning_models/neoversen3.h
@@ -205,7 +205,7 @@ static const struct tune_params neoversen3_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index dd9120eee48ae4fa30d589173788389c2413cdfe..9698f135a4f86a74c19cc601508ca9cb948f9298 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -214,7 +214,7 @@ static const struct tune_params neoversev1_tunings =
1 /* store_pred. */
}, /* memmov_cost. */
3, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index 1369de73991cb9413ea3ea8fb292b7589420bab2..7e812d26556690f2e46e86fabcf5f63144370ac0 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -218,7 +218,7 @@ static const struct tune_params neoversev2_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
5, /* issue_rate */
- (AARCH64_FUSE_BASE | AARCH64_FUSE_CMP_CSEL | AARCH64_FUSE_CMP_CSET), /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev3.h b/gcc/config/aarch64/tuning_models/neoversev3.h
index d8c82255378c81d4d2437412d41f3cbbd456f9c3..f100d484e6b2f1e63b5aedeee14bddcd29c917c9 100644
--- a/gcc/config/aarch64/tuning_models/neoversev3.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3.h
@@ -205,7 +205,7 @@ static const struct tune_params neoversev3_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev3ae.h b/gcc/config/aarch64/tuning_models/neoversev3ae.h
index 7f050501ede7108e1109ea0b5efae355380d07dc..1f59865ceadb37817def0529bdcc4b3530fef99a 100644
--- a/gcc/config/aarch64/tuning_models/neoversev3ae.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3ae.h
@@ -205,7 +205,7 @@ static const struct tune_params neoversev3ae_tunings =
2 /* store_pred. */
}, /* memmov_cost. */
10, /* issue_rate */
- AARCH64_FUSE_BASE, /* fusible_ops */
+ AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops */
"32:16", /* function_align. */
"4", /* jump_align. */
"32:16", /* loop_align. */