 From ece9611d080f0c048cef6e413f4a5e51683ec177 Mon Sep 17 00:00:00 2001
From: Joel Jakobsson <github@compiler.org>
Date: Tue, 2 Jul 2024 17:54:25 +0200
Subject: [PATCH] numeric: Simplified fast-path computation for mul_var()

---
 src/backend/utils/adt/numeric.c | 100 ++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 5510a203b0..0fb217f6e9 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -8747,6 +8747,106 @@ mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
 		return;
 	}
 
+	/*
+	 * Simplified fast-path computation, if var1 has just one or two digits.
+	 * This is significantly faster, since it avoids allocating a separate
+	 * digit array, making multiple passes over var2, and having separate
+	 * carry-propagation passes.
+	 */
+	if (var1ndigits <= 3)
+	{
+		NumericDigit *res_buf;
+
+		/* Allocate result digit array */
+		res_buf = digitbuf_alloc(res_ndigits);
+		res_buf[0] = 0;				/* spare digit for later rounding */
+		res_digits = res_buf + 1;
+
+		/*
+		 * Compute the result digits directly, in one pass, propagating the
+		 * carry up as we go.
+		 */
+		switch (var1ndigits)
+		{
+			case 1:
+				carry = 0;
+				for (i = res_ndigits - 3; i >= 0; i--)
+				{
+					newdig = (int) var1digits[0] * var2digits[i] + carry;
+					res_digits[i + 1] = (NumericDigit) (newdig % NBASE);
+					carry = newdig / NBASE;
+				}
+				res_digits[0] = (NumericDigit) carry;
+				break;
+
+			case 2:
+				newdig = (int) var1digits[1] * var2digits[res_ndigits - 4];
+				if (res_ndigits - 3 < var2ndigits)
+					newdig += (int) var1digits[0] * var2digits[res_ndigits - 3];
+				res_digits[res_ndigits - 2] = (NumericDigit) (newdig % NBASE);
+				carry = newdig / NBASE;
+				for (i = res_ndigits - 4; i >= 1; i--)
+				{
+					newdig = (int) var1digits[0] * var2digits[i] +
+						(int) var1digits[1] * var2digits[i - 1] + carry;
+					res_digits[i + 1] = (NumericDigit) (newdig % NBASE);
+					carry = newdig / NBASE;
+				}
+				newdig = (int) var1digits[0] * var2digits[0] + carry;
+				res_digits[1] = (NumericDigit) (newdig % NBASE);
+				res_digits[0] = (NumericDigit) (newdig / NBASE);
+				break;
+
+			case 3:
+				newdig = (int) var1digits[2] * var2digits[res_ndigits - 5];
+				if (res_ndigits - 4 < var2ndigits)
+					newdig += (int) var1digits[1] * var2digits[res_ndigits - 4];
+				if (res_ndigits - 3 < var2ndigits)
+					newdig += (int) var1digits[0] * var2digits[res_ndigits - 3];
+				res_digits[res_ndigits - 2] = (NumericDigit) (newdig % NBASE);
+				carry = newdig / NBASE;
+				for (i = res_ndigits - 4; i >= 2; i--)
+				{
+					newdig = carry;
+					if (i < var2ndigits)
+						newdig += (int) var1digits[0] * var2digits[i];
+					if (i - 1 >= 0 && i - 1 < var2ndigits)
+						newdig += (int) var1digits[1] * var2digits[i - 1];
+					if (i - 2 >= 0 && i - 2 < var2ndigits)
+						newdig += (int) var1digits[2] * var2digits[i - 2];
+					res_digits[i + 1] = (NumericDigit) (newdig % NBASE);
+					carry = newdig / NBASE;
+				}
+				newdig = carry;
+				if (var2ndigits > 1)
+					newdig += (int) var1digits[0] * var2digits[1];
+				if (var2ndigits > 0)
+					newdig += (int) var1digits[1] * var2digits[0];
+				res_digits[2] = (NumericDigit) (newdig % NBASE);
+				carry = newdig / NBASE;
+				newdig = (int) var1digits[0] * var2digits[0] + carry;
+				res_digits[1] = (NumericDigit) (newdig % NBASE);
+				res_digits[0] = (NumericDigit) (newdig / NBASE);
+				break;
+		}
+
+		/* Store the product in result (minus extra rounding digit) */
+		digitbuf_free(result->buf);
+		result->ndigits = res_ndigits - 1;
+		result->buf = res_buf;
+		result->digits = res_digits;
+		result->weight = res_weight - 1;
+		result->sign = res_sign;
+
+		/* Round to target rscale (and set result->dscale) */
+		round_var(result, rscale);
+
+		/* Strip leading and trailing zeroes */
+		strip_var(result);
+
+		return;
+	}
+
 	/*
 	 * We do the arithmetic in an array "dig[]" of signed int's.  Since
 	 * INT_MAX is noticeably larger than NBASE*NBASE, this gives us headroom
-- 
2.45.1

