On 19/5/26 18:22, James Hilliard wrote:
From: Richard Henderson <[email protected]>
Add a helper for multi-limb 64-bit addition. The helper emits native
carry-chain TCG ops when they are available and falls back to explicit
carry propagation otherwise.
This lets target translators build wider integer accumulators inline
without open-coding the same add-with-carry sequence at each use site.
Signed-off-by: Richard Henderson <[email protected]>
---
Changes v7 -> v8:
- New patch from Richard Henderson's v7.5 multiplier rework.
---
include/tcg/tcg-op-common.h | 1 +
tcg/tcg-op.c | 42 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+)
Richard, could we squash something like this?
-- >8 --
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
index fd3a50bf4c4..1ac53a61114 100644
--- a/docs/devel/tcg-ops.rst
+++ b/docs/devel/tcg-ops.rst
@@ -672,6 +672,12 @@ Multiword arithmetic support
| If mulu2/muls2 are not provided by the backend, the tcg-op
generator
can obtain the same results by emitting a pair of opcodes,
mul + muluh/mulsh.
+ * - addN *n*, *t0*, *t1*, *t2*
+
+ - | For each *n*, compute *t0* = *t1* + *t2* + *C*,
+ where *C* is the input carry bit provided by the host
+ architecture, and also compute the output carry bit.
+ Overlap between *t0* and *t1*/*t2* arrays is not allowed.
Memory Barrier support
----------------------
---
diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
index e02f209c09..ee0ad5f6a3 100644
--- a/include/tcg/tcg-op-common.h
+++ b/include/tcg/tcg-op-common.h
@@ -251,6 +251,7 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co,
TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci);
+void tcg_gen_addN_i64(int n, TCGv_i64 *r, TCGv_i64 *a, TCGv_i64 *b);
void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64
arg2);
void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64
arg2);
void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64
arg2);
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index d8ae57d604..28ef5bacfd 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2275,6 +2275,48 @@ void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co,
}
}
+void tcg_gen_addN_i64(int n, TCGv_i64 *r, TCGv_i64 *a, TCGv_i64 *b)
+{
+ tcg_debug_assert(n > 2);
+
+ /* ??? Don't allow overlap for now. */
+ for (int i = 0; i < n - 1; ++i) {
+ for (int j = i + 1; j < n; ++j) {
+ tcg_debug_assert(r[i] != a[j]);
+ tcg_debug_assert(r[i] != b[j]);
+ }
+ }
+
+ if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I64, 0)) {
+ tcg_gen_op3_i64(INDEX_op_addco, r[0], a[0], b[0]);
+ for (int i = 1; i < n - 1; ++i) {
+ tcg_gen_op3_i64(INDEX_op_addcio, r[i], a[i], b[i]);
+ }
+ tcg_gen_op3_i64(INDEX_op_addci, r[n - 1], a[n - 1], b[n - 1]);
+ } else {
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
+ TCGv_i64 c = tcg_temp_ebb_new_i64();
+
+ tcg_gen_add_i64(t, a[0], b[0]);
+ tcg_gen_setcond_i64(TCG_COND_LTU, c, t, a[0]);
+ tcg_gen_mov_i64(r[0], t);
+
+ for (int i = 1; i < n - 1; ++i) {
+ tcg_gen_add_i64(t, a[i], c);
+ tcg_gen_setcond_i64(TCG_COND_LTU, c, t, c);
+ tcg_gen_add_i64(r[i], b[i], t);
+ tcg_gen_setcond_i64(TCG_COND_LTU, t, r[i], t);
+ tcg_gen_or_i64(c, c, t);
+ }
+
+ tcg_gen_add_i64(r[n - 1], a[n - 1], b[n - 1]);
+ tcg_gen_add_i64(r[n - 1], r[n - 1], c);
+
+ tcg_temp_free_i64(t);
+ tcg_temp_free_i64(c);
+ }
+}
+
void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
{