From: Richard Henderson <[email protected]> Add a helper for multi-limb 64-bit addition. The helper emits native carry-chain TCG ops when they are available and falls back to explicit carry propagation otherwise.
This lets target translators build wider integer accumulators inline without open-coding the same add-with-carry sequence at each use site. Signed-off-by: Richard Henderson <[email protected]> Signed-off-by: James Hilliard <[email protected]> Signed-off-by: Philippe Mathieu-Daudé <[email protected]> --- include/tcg/tcg-op-common.h | 1 + tcg/tcg-op.c | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/include/tcg/tcg-op-common.h b/include/tcg/tcg-op-common.h index e02f209c093..ee0ad5f6a3f 100644 --- a/include/tcg/tcg-op-common.h +++ b/include/tcg/tcg-op-common.h @@ -251,6 +251,7 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh); void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co, TCGv_i64 a, TCGv_i64 b, TCGv_i64 ci); +void tcg_gen_addN_i64(int n, TCGv_i64 *r, TCGv_i64 *a, TCGv_i64 *b); void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2); diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index d8ae57d6047..28ef5bacfdf 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2275,6 +2275,48 @@ void tcg_gen_addcio_i64(TCGv_i64 r, TCGv_i64 co, } } +void tcg_gen_addN_i64(int n, TCGv_i64 *r, TCGv_i64 *a, TCGv_i64 *b) +{ + tcg_debug_assert(n > 2); + + /* ??? Don't allow overlap for now. */ + for (int i = 0; i < n - 1; ++i) { + for (int j = i + 1; j < n; ++j) { + tcg_debug_assert(r[i] != a[j]); + tcg_debug_assert(r[i] != b[j]); + } + } + + if (tcg_op_supported(INDEX_op_addci, TCG_TYPE_I64, 0)) { + tcg_gen_op3_i64(INDEX_op_addco, r[0], a[0], b[0]); + for (int i = 1; i < n - 1; ++i) { + tcg_gen_op3_i64(INDEX_op_addcio, r[i], a[i], b[i]); + } + tcg_gen_op3_i64(INDEX_op_addci, r[n - 1], a[n - 1], b[n - 1]); + } else { + TCGv_i64 t = tcg_temp_ebb_new_i64(); + TCGv_i64 c = tcg_temp_ebb_new_i64(); + + tcg_gen_add_i64(t, a[0], b[0]); + tcg_gen_setcond_i64(TCG_COND_LTU, c, t, a[0]); + tcg_gen_mov_i64(r[0], t); + + for (int i = 1; i < n - 1; ++i) { + tcg_gen_add_i64(t, a[i], c); + tcg_gen_setcond_i64(TCG_COND_LTU, c, t, c); + tcg_gen_add_i64(r[i], b[i], t); + tcg_gen_setcond_i64(TCG_COND_LTU, t, r[i], t); + tcg_gen_or_i64(c, c, t); + } + + tcg_gen_add_i64(r[n - 1], a[n - 1], b[n - 1]); + tcg_gen_add_i64(r[n - 1], r[n - 1], c); + + tcg_temp_free_i64(t); + tcg_temp_free_i64(c); + } +} + void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh) { -- 2.53.0
