Re: [HACKERS] Abbreviated keys for Numeric

Andrew Gierth Sun, 22 Mar 2015 13:02:12 -0700

So here's a version 3 patch:

1. #ifdefism is reduced to a minimum by (a) desupporting values of NBASE
other than 10000, and (b) keeping the 32bit code, so that there is now
no longer any question of abbreviation not being used (it always is).
So the only #ifs in the code body (rather than declarations) are to
select which implementation of numeric_abbrev_convert_var to use.


2. Some (but not all) stylistic changes have been made following Peter's
version.

3. Buffer management has been simplified by simply allocating the
maximum needed size upfront (since it's only needed for short varlenas).
The truncation behavior has been removed.

4. Updated oid choice etc.

The substance of the code is unchanged from my original patch.  I didn't
add diagnostic output to numeric_abbrev_abort, see my separate post
about the suggestion of a GUC for that.

-- 
Andrew (irc:RhodiumToad)

diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index ff9bfcc..a27de6b 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -29,6 +29,7 @@
 #include "access/hash.h"
 #include "catalog/pg_type.h"
 #include "funcapi.h"
+#include "lib/hyperloglog.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
@@ -36,6 +37,21 @@
 #include "utils/builtins.h"
 #include "utils/int8.h"
 #include "utils/numeric.h"
+#include "utils/sortsupport.h"
+
+/* XXX these should be in c.h - remove when that patch goes in */
+#ifndef INT64_MIN
+#define INT64_MIN	(-INT64CONST(0x7FFFFFFFFFFFFFFF) - 1)
+#endif
+#ifndef INT64_MAX
+#define INT64_MAX	INT64CONST(0x7FFFFFFFFFFFFFFF)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN	(-0x7FFFFFFF - 1)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX	(0x7FFFFFFF)
+#endif
 
 /* ----------
  * Uncomment the following to enable compilation of dump_numeric()
@@ -57,6 +73,12 @@
  * are easy.  Also, it's actually more efficient if NBASE is rather less than
  * sqrt(INT_MAX), so that there is "headroom" for mul_var and div_var_fast to
  * postpone processing carries.
+ *
+ * Values of NBASE other than 10000 are considered of historical interest only
+ * and are no longer supported in any sense; no mechanism exists for the client
+ * to discover the base, so every client supporting binary mode expects the
+ * base-10000 format.  If you plan to change this, also note the numeric
+ * abbreviation code, which assumes NBASE=10000.
  * ----------
  */
 
@@ -90,6 +112,10 @@ typedef signed char NumericDigit;
 typedef int16 NumericDigit;
 #endif
 
+#if DEC_DIGITS != 4
+#error "Numeric bases other than 10000 are no longer supported"
+#endif
+
 /*
  * The Numeric type as stored on disk.
  *
@@ -275,6 +301,30 @@ typedef struct
 
 
 /* ----------
+ * Sort support.
+ * ----------
+ */
+typedef struct
+{
+	void			   *buf;			/* buffer for short varlenas */
+	int64				input_count;	/* number of non-null values seen */
+	bool				estimating;		/* true if estimating cardinality */
+
+	hyperLogLogState	abbr_card;		/* cardinality estimator */
+} NumericSortSupport;
+
+#ifdef USE_FLOAT8_BYVAL
+#define NUMERIC_ABBREV_BITS 64
+#define DatumGetNumericAbbrev(d) DatumGetInt64(d)
+#define NUMERIC_ABBREV_NAN Int64GetDatum(INT64_MIN)
+#else
+#define NUMERIC_ABBREV_BITS 32
+#define DatumGetNumericAbbrev(d) DatumGetInt32(d)
+#define NUMERIC_ABBREV_NAN Int32GetDatum(INT32_MIN)
+#endif
+
+
+/* ----------
  * Some preinitialized constants
  * ----------
  */
@@ -409,6 +459,13 @@ static void int128_to_numericvar(int128 val, NumericVar *var);
 static double numeric_to_double_no_overflow(Numeric num);
 static double numericvar_to_double_no_overflow(NumericVar *var);
 
+static Datum numeric_abbrev_convert(Datum original_datum, SortSupport ssup);
+static bool  numeric_abbrev_abort(int memtupcount, SortSupport ssup);
+static int   numeric_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static int   numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
+
+static Datum numeric_abbrev_convert_var(NumericVar *var, NumericSortSupport *nss);
+
 static int	cmp_numerics(Numeric num1, Numeric num2);
 static int	cmp_var(NumericVar *var1, NumericVar *var2);
 static int cmp_var_common(const NumericDigit *var1digits, int var1ndigits,
@@ -1507,9 +1564,393 @@ compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
  * Note: btree indexes need these routines not to leak memory; therefore,
  * be careful to free working copies of toasted datums.  Most places don't
  * need to be so careful.
+ *
+ * Sort support:
+ *
+ * We implement the sortsupport strategy routine in order to get the benefit of
+ * abbreviation. The ordinary numeric comparison can be quite slow as a result
+ * of palloc/pfree cycles (due to detoasting packed values for alignment);
+ * while this could be worked on itself, the abbreviation strategy gives more
+ * speedup in many common cases.
+ *
+ * Two different representations are used for the abbreviated form, one in
+ * int32 and one in int64, whichever fits into a by-value Datum.  In both cases
+ * the representation is negated relative to the original value, because we use
+ * the largest negative value for NaN, which sorts higher than other values. We
+ * convert the absolute value of the numeric to a 31-bit or 63-bit positive
+ * value, and then negate it if the original number was positive.
+ *
+ * We abort the abbreviation process if the abbreviation cardinality is below
+ * 0.01% of the row count (1 per 10k non-null rows).  The actual break-even
+ * point is somewhat below that, perhaps 1 per 30k (at 1 per 100k there's a
+ * very small penalty), but we don't want to build up too many abbreviated
+ * values before first testing for abort, so we take the slightly pessimistic
+ * number.  We make no attempt to estimate the cardinality of the real values,
+ * since it plays no part in the cost model here (if the abbreviation is equal,
+ * the cost of comparing equal and unequal underlying values is comparable).
+ * We discontinue even checking for abort (saving us the hashing overhead) if
+ * the estimated cardinality gets to 100k; that would be enough to support many
+ * billions of rows while doing no worse than breaking even.
+ *
  * ----------------------------------------------------------------------
  */
 
+/*
+ * Sort support strategy routine.
+ */
+Datum
+numeric_sortsupport(PG_FUNCTION_ARGS)
+{
+	SortSupport		ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+	ssup->comparator = numeric_fast_cmp;
+
+	if (ssup->abbreviate)
+	{
+		NumericSortSupport *nss;
+		MemoryContext	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+		nss = palloc(sizeof(NumericSortSupport));
+
+		/*
+		 * palloc a buffer for handling unaligned packed values in addition to
+		 * the support struct
+		 */
+		nss->buf = palloc(VARATT_SHORT_MAX + VARHDRSZ + 1);
+
+		nss->input_count = 0;
+		nss->estimating = true;
+		initHyperLogLog(&nss->abbr_card, 10);
+
+		ssup->ssup_extra = nss;
+
+		ssup->abbrev_full_comparator = ssup->comparator;
+		ssup->comparator = numeric_cmp_abbrev;
+		ssup->abbrev_converter = numeric_abbrev_convert;
+		ssup->abbrev_abort = numeric_abbrev_abort;
+
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	PG_RETURN_VOID();
+}
+
+/*
+ * Abbreviate a numeric datum, handling NaNs and detoasting
+ * (must not leak memory!)
+ */
+static Datum
+numeric_abbrev_convert(Datum original_datum, SortSupport ssup)
+{
+	NumericSortSupport	*nss = ssup->ssup_extra;
+	void	   *original_varatt = PG_DETOAST_DATUM_PACKED(original_datum);
+	Numeric		value;
+	Datum		result;
+
+	nss->input_count += 1;
+
+	/*
+	 * This is to handle packed datums without needing a palloc/pfree cycle; we
+	 * keep and reuse a buffer large enough to handle any short datum.
+	 */
+
+	if (VARATT_IS_SHORT(original_varatt))
+	{
+		void	   *buf = nss->buf;
+		Size		sz = VARSIZE_SHORT(original_varatt) - VARHDRSZ_SHORT;
+
+		Assert(sz <= VARATT_SHORT_MAX - VARHDRSZ_SHORT);
+
+		SET_VARSIZE(buf, VARHDRSZ + sz);
+		memcpy(VARDATA(buf), VARDATA_SHORT(original_varatt), sz);
+
+		value = (Numeric) buf;
+	}
+	else
+		value = (Numeric) original_varatt;
+
+	if (NUMERIC_IS_NAN(value))
+	{
+		result = NUMERIC_ABBREV_NAN;
+	}
+	else
+	{
+		NumericVar	var;
+
+		init_var_from_num(value, &var);
+
+		result = numeric_abbrev_convert_var(&var, nss);
+	}
+
+	/* should happen only for external toasts */
+	if ((Pointer) original_varatt != DatumGetPointer(original_datum))
+		pfree(original_varatt);
+
+	return result;
+}
+
+/*
+ * Consider whether to abort abbreviation.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data. There is
+ * no reason to do so: unlike text, we have no fast check for equal values, so
+ * we pay the full overhead whenever the abbreviations are equal regardless of
+ * whether the underlying values are also equal.
+ */
+static bool
+numeric_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+	NumericSortSupport	*nss = ssup->ssup_extra;
+	double abbr_card;
+
+	if (memtupcount < 10000 || nss->input_count < 10000 || !nss->estimating)
+		return false;
+
+	abbr_card = estimateHyperLogLog(&nss->abbr_card);
+
+	/*
+	 * if we have >100k distinct values, then even if we were sorting many
+	 * billion rows we'd likely still break even, and the penalty of undoing
+	 * that many rows of abbrevs would probably not be worth it. Stop even
+	 * counting at that point.
+	 */
+	if (abbr_card > 100000.0)
+	{
+		nss->estimating = false;
+		return false;
+	}
+
+	/*
+	 * target minimum cardinality is 1 per ~10k of non-null inputs.  (The break
+	 * even point is somewhere between one per 100k rows, where abbreviation
+	 * has a very slight penalty, and 1 per 10k where it wins by a measurable
+	 * percentage.)  We use the relatively pessimistic 10k threshold, and add a
+	 * 0.5 row fudge factor, because it allows us to abort earlier on genuinely
+	 * pathological data where we've had exactly one abbreviated value in the
+	 * first 10k (non-null) rows.
+	 */
+	if (abbr_card < nss->input_count / 10000.0 + 0.5)
+		return true;
+
+	return false;
+}
+
+/*
+ * non-fmgr interface to the comparison routine to allow sortsupport to elide
+ * the fmgr call.  The saving here is small given how slow numeric
+ * comparisons are, but there's no reason not to implement it.
+ */
+static int
+numeric_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+	Numeric		nx = DatumGetNumeric(x);
+	Numeric		ny = DatumGetNumeric(y);
+	int			result;
+
+	(void) ssup;
+
+	result = cmp_numerics(nx, ny);
+
+	if ((Pointer) nx != DatumGetPointer(x))
+		pfree(nx);
+	if ((Pointer) ny != DatumGetPointer(y))
+		pfree(ny);
+
+	return result;
+}
+
+/*
+ * Compare abbreviations of values. (Abbreviations may be equal where the true
+ * values differ, but if the abbreviations differ, they must reflect the
+ * ordering of the true values.)
+ */
+static int
+numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
+{
+	(void) ssup;
+
+	/*
+	 * NOTE WELL: this is intentionally backwards, because the abbreviation is
+	 * negated relative to the original value, to handle NaN.
+	 */
+	if (DatumGetNumericAbbrev(x) < DatumGetNumericAbbrev(y))
+		return 1;
+	if (DatumGetNumericAbbrev(x) > DatumGetNumericAbbrev(y))
+		return -1;
+	return 0;
+}
+
+/*
+ * Abbreviate a NumericVar according to the available bit size.
+ *
+ * The 31-bit value is constructed as:
+ *
+ *  0 + 7bits digit weight + 24 bits digit value
+ *
+ * where the digit weight is in single decimal digits, not digit words, and
+ * stored in excess-44 representation. The 24-bit digit value is the 7 most
+ * significant decimal digits of the value converted to binary. Values whose
+ * weights would fall outside the representable range are rounded off to zero
+ * (which is also used to represent actual zeros) or to 0x7FFFFFFF (which
+ * otherwise cannot occur). Abbreviation therefore fails to gain any advantage
+ * where values are outside the range 10^-44 to 10^83, which is not considered
+ * to be a serious limitation, or when values are of the same magnitude and
+ * equal in the first 7 decimal digits, which is considered to be an
+ * unavoidable limitation given the available bits. (Stealing three more bits
+ * to compare another digit would narrow the range of representable weights by
+ * a factor of 8, which starts to look like a real limiting factor.)
+ *
+ * (The value 44 for the excess is essentially arbitrary)
+ *
+ * The 63-bit value is constructed as:
+ *
+ *  0 + 7bits weight + 4 x 14-bit packed digit words
+ *
+ * The weight in this case is again stored in excess-44, but this time it is
+ * the original weight in digit words (i.e. powers of 10000). The first four
+ * digit words of the value (if present; trailing zeros are assumed as needed)
+ * are packed into 14 bits each to form the rest of the value. Again,
+ * out-of-range values are rounded off to 0 or 0x7FFFFFFFFFFFFFFF. The
+ * representable range in this case is 10^-176 to 10^332, which is considered
+ * to be good enough for all practical purposes, and comparison of 4 words
+ * means that at least 13 decimal digits are compared, which is considered to
+ * be a reasonable compromise between effectiveness and efficiency in computing
+ * the abbreviation.
+ *
+ * (The value 44 for the excess is even more arbitrary here, it was chosen just
+ * to match the value used in the 31-bit case)
+ */
+
+#if NUMERIC_ABBREV_BITS == 64
+
+static Datum
+numeric_abbrev_convert_var(NumericVar *var, NumericSortSupport *nss)
+{
+	int		ndigits = var->ndigits;
+	int		weight = var->weight;
+	int64	result;
+
+	if (ndigits == 0 || weight < -44)
+	{
+		result = 0;
+	}
+	else if (weight > 83)
+	{
+		result = INT64_MAX;
+	}
+	else
+	{
+		result = ((int64)(weight + 44) << 56);
+
+		switch (ndigits)
+		{
+			default:
+				result |= ((int64) var->digits[3]);
+				/* FALLTHROUGH */
+			case 3:
+				result |= ((int64) var->digits[2]) << 14;
+				/* FALLTHROUGH */
+			case 2:
+				result |= ((int64) var->digits[1]) << 28;
+				/* FALLTHROUGH */
+			case 1:
+				result |= ((int64) var->digits[0]) << 42;
+				break;
+		}
+	}
+
+	/* the abbrev is negated relative to the original */
+	if (var->sign == NUMERIC_POS)
+		result = -result;
+
+	if (nss->estimating)
+	{
+		uint32 tmp = ((uint32)result) ^ (uint32)((uint64) result >> 32);
+		addHyperLogLog(&nss->abbr_card, hash_uint32(tmp));
+	}
+
+	return Int64GetDatum(result);
+}
+
+#endif /* NUMERIC_ABBREV_BITS == 64 */
+
+#if NUMERIC_ABBREV_BITS == 32
+
+static Datum
+numeric_abbrev_convert_var(NumericVar *var, NumericSortSupport *nss)
+{
+	int		ndigits = var->ndigits;
+	int		weight = var->weight;
+	int32	result;
+
+	if (ndigits == 0 || weight < -11)
+	{
+		result = 0;
+	}
+	else if (weight > 20)
+	{
+		result = INT32_MAX;
+	}
+	else
+	{
+		NumericDigit	nxt1 = (ndigits > 1) ? var->digits[1] : 0;
+
+		weight = (weight + 11) * 4;
+
+		result = var->digits[0];
+
+		/*
+		 * "result" now has 1 to 4 nonzero decimal digits. We pack in more
+		 * digits to make 7 in total (largest we can fit in 24 bits)
+		 */
+
+		if (result > 999)
+		{
+			/* already have 4 digits, add 3 more */
+			result = (result * 1000) + (nxt1 / 10);
+			weight += 3;
+		}
+		else if (result > 99)
+		{
+			/* already have 3 digits, add 4 more */
+			result = (result * 10000) + nxt1;
+			weight += 2;
+		}
+		else if (result > 9)
+		{
+			NumericDigit	nxt2 = (ndigits > 2) ? var->digits[2] : 0;
+			/* already have 2 digits, add 5 more */
+			result = (result * 100000) + (nxt1 * 10) + (nxt2 / 1000);
+			weight += 1;
+		}
+		else
+		{
+			NumericDigit	nxt2 = (ndigits > 2) ? var->digits[2] : 0;
+			/* already have 1 digit, add 6 more */
+			result = (result * 1000000) + (nxt1 * 100) + (nxt2 / 100);
+		}
+
+		result = result | (weight << 24);
+	}
+
+	/* the abbrev is negated relative to the original */
+	if (var->sign == NUMERIC_POS)
+		result = -result;
+
+	if (nss->estimating)
+	{
+		uint32 tmp = (uint32)result;
+		addHyperLogLog(&nss->abbr_card, hash_uint32(tmp));
+	}
+
+	return Int32GetDatum(result);
+}
+
+#endif /* NUMERIC_ABBREV_BITS == 32 */
+
+/*
+ * Ordinary (non-sortsupport) comparisons follow.
+ */
 
 Datum
 numeric_cmp(PG_FUNCTION_ARGS)
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 49d3d13..2461593 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -118,6 +118,7 @@ DATA(insert (	1984   829 829 1 836 ));
 DATA(insert (	1986   19 19 1 359 ));
 DATA(insert (	1986   19 19 2 3135 ));
 DATA(insert (	1988   1700 1700 1 1769 ));
+DATA(insert (	1988   1700 1700 2 3280 ));
 DATA(insert (	1989   26 26 1 356 ));
 DATA(insert (	1989   26 26 2 3134 ));
 DATA(insert (	1991   30 30 1 404 ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 3c218a3..a5fcda9 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2366,6 +2366,8 @@ DATA(insert OID = 1767 ( numeric_larger			PGNSP PGUID 12 1 0 0 0 f f f f t f i 2
 DESCR("larger of two");
 DATA(insert OID = 1769 ( numeric_cmp			PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 23 "1700 1700" _null_ _null_ _null_ _null_ numeric_cmp _null_ _null_ _null_ ));
 DESCR("less-equal-greater");
+DATA(insert OID = 3280 ( numeric_sortsupport	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2278 "2281" _null_ _null_ _null_ _null_ numeric_sortsupport _null_ _null_ _null_ ));
+DESCR("sort support");
 DATA(insert OID = 1771 ( numeric_uminus			PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 1700 "1700" _null_ _null_ _null_ _null_ numeric_uminus _null_ _null_ _null_ ));
 DATA(insert OID = 1779 ( int8					PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 20 "1700" _null_ _null_ _null_ _null_ numeric_int8 _null_ _null_ _null_ ));
 DESCR("convert numeric to int8");
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 6310641..33a453f 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -978,6 +978,7 @@ extern Datum numeric_round(PG_FUNCTION_ARGS);
 extern Datum numeric_trunc(PG_FUNCTION_ARGS);
 extern Datum numeric_ceil(PG_FUNCTION_ARGS);
 extern Datum numeric_floor(PG_FUNCTION_ARGS);
+extern Datum numeric_sortsupport(PG_FUNCTION_ARGS);
 extern Datum numeric_cmp(PG_FUNCTION_ARGS);
 extern Datum numeric_eq(PG_FUNCTION_ARGS);
 extern Datum numeric_ne(PG_FUNCTION_ARGS);

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Abbreviated keys for Numeric

Reply via email to