On 19/5/26 18:22, James Hilliard wrote:
From: Richard Henderson <[email protected]>

Add a helper for multi-limb 64-bit addition. The helper emits native
carry-chain TCG ops when they are available and falls back to explicit
carry propagation otherwise.

This lets target translators build wider integer accumulators inline
without open-coding the same add-with-carry sequence at each use site.

Signed-off-by: Richard Henderson <[email protected]>

---
Changes v7 -> v8:
   - New patch from Richard Henderson's v7.5 multiplier rework.
---
  include/tcg/tcg-op-common.h |  1 +
  tcg/tcg-op.c                | 42 ++++++++++++++++++++++++++++++++++++++++++
  2 files changed, 43 insertions(+)

Richard, could we squash something like this?

-- >8 --
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
index fd3a50bf4c4..1ac53a61114 100644
--- a/docs/devel/tcg-ops.rst
+++ b/docs/devel/tcg-ops.rst
@@ -672,6 +672,12 @@ Multiword arithmetic support
| If mulu2/muls2 are not provided by the backend, the tcg-op generator can obtain the same results by emitting a pair of opcodes, mul + muluh/mulsh.

+   * - addN *n*, *t0*, *t1*, *t2*
+
+     - | For each *n*, compute *t0* = *t1* + *t2* + *C*,
+         where *C* is the input carry bit provided by the host
+         architecture, and also compute the output carry bit.
+         Overlap between *t0* and *t1*/*t2* arrays is not allowed.

 Memory Barrier support
 ----------------------
---

diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h
index e02f209c09..ee0ad5f6a3 100644
--- a/include/tcg/tcg-op-common.h
+++ b/include/tcg/tcg-op-common.h
@@ -251,6 +251,7 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
                        TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
  void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co,
                          TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci);
+void tcg_gen_addN_i64(int n, TCGv_i64 *r, TCGv_i64 *a, TCGv_i64 *b);
  void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 
arg2);
  void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 
arg2);
  void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 
arg2);
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index d8ae57d604..28ef5bacfd 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2275,6 +2275,48 @@ void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co,
      }
  }
+void tcg_gen_addN_i64(int n, TCGv_i64 *r, TCGv_i64 *a, TCGv_i64 *b)
+{
+    tcg_debug_assert(n > 2);
+
+    /* ??? Don't allow overlap for now. */
+    for (int i = 0; i < n - 1; ++i) {
+        for (int j = i + 1; j < n; ++j) {
+            tcg_debug_assert(r[i] != a[j]);
+            tcg_debug_assert(r[i] != b[j]);
+        }
+    }
+
+    if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I64, 0)) {
+        tcg_gen_op3_i64(INDEX_op_addco, r[0], a[0], b[0]);
+        for (int i = 1; i < n - 1; ++i) {
+            tcg_gen_op3_i64(INDEX_op_addcio, r[i], a[i], b[i]);
+        }
+        tcg_gen_op3_i64(INDEX_op_addci, r[n - 1], a[n - 1], b[n - 1]);
+    } else {
+        TCGv_i64 t = tcg_temp_ebb_new_i64();
+        TCGv_i64 c = tcg_temp_ebb_new_i64();
+
+        tcg_gen_add_i64(t, a[0], b[0]);
+        tcg_gen_setcond_i64(TCG_COND_LTU, c, t, a[0]);
+        tcg_gen_mov_i64(r[0], t);
+
+        for (int i = 1; i < n - 1; ++i) {
+            tcg_gen_add_i64(t, a[i], c);
+            tcg_gen_setcond_i64(TCG_COND_LTU, c, t, c);
+            tcg_gen_add_i64(r[i], b[i], t);
+            tcg_gen_setcond_i64(TCG_COND_LTU, t, r[i], t);
+            tcg_gen_or_i64(c, c, t);
+        }
+
+        tcg_gen_add_i64(r[n - 1], a[n - 1], b[n - 1]);
+        tcg_gen_add_i64(r[n - 1], r[n - 1], c);
+
+        tcg_temp_free_i64(t);
+        tcg_temp_free_i64(c);
+    }
+}
+
  void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
                        TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh)
  {



Reply via email to