Hi Egor,
While reviewing a patch improving join estimates for ranges [1] I
realized we don't show stats for ranges in pg_stats, and I recalled we
had this patch.
I rebased the v2, and I decided to took a stab at showing separate
histograms for lower/upper histogram bounds. I believe it makes it way
more readable, which is what pg_stats is about IMHO.
This simply adds two functions, accepting/producing anyarray - one for
lower bounds, one for upper bounds. I don't think it can be done with a
plain subquery (or at least I don't know how).
Finally, it renames the empty_range_frac to start with range_, per the
earlier discussion. I wonder if the new column names for lower/upper
bounds (range_lower_bounds_histograms/range_upper_bounds_histograms) are
too long ...
regards
[1] https://commitfest.postgresql.org/41/3821/
--
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
From b339c0eab5616cec61e9d9e85398034861608d30 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Fri, 20 Jan 2023 20:50:41 +0100
Subject: [PATCH 1/3] Display length and bounds histograms in pg_stats
Values corresponding to STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM and
STATISTIC_KIND_BOUNDS_HISTOGRAM were not exposed to pg_stats when these
slot kinds were introduced in 918eee0c49.
This commit adds the missing fields to pg_stats.
TODO: catalog version bump
---
doc/src/sgml/catalogs.sgml | 32 ++++++++++++++++++++++++++++
src/backend/catalog/system_views.sql | 23 +++++++++++++++++++-
src/include/catalog/pg_statistic.h | 3 +++
src/test/regress/expected/rules.out | 26 +++++++++++++++++++++-
4 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index c1e4048054e..c8bd84c56eb 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -9634,6 +9634,38 @@ SCRAM-SHA-256$<replaceable><iteration count></replaceable>:<replaceable>&l
User mapping specific options, as <quote>keyword=value</quote> strings
</para></entry>
</row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>range_length_histogram</structfield> <type>anyarray</type>
+ </para>
+ <para>
+ A histogram of the lengths of non-empty and non-null range values of a
+ range type column. (Null for non-range types.)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>empty_range_frac</structfield> <type>float4</type>
+ </para>
+ <para>
+ Fraction of column entries whose values are empty ranges.
+ (Null for non-range types.)
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>range_bounds_histograms</structfield> <type>anyarray</type>
+ </para>
+ <para>
+ Histograms of lower and upper bounds of non-empty, non-null ranges,
+ combined into a single array of range values. The lower and upper bounds
+ of each value correspond to the histograms of lower and upper bounds
+ respectively. (Null for non-range types.)
+ </para></entry>
+ </row>
</tbody>
</tgroup>
</table>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 8608e3fa5b1..ccd6c7ffdb7 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -243,7 +243,28 @@ CREATE VIEW pg_stats WITH (security_barrier) AS
WHEN stakind3 = 5 THEN stanumbers3
WHEN stakind4 = 5 THEN stanumbers4
WHEN stakind5 = 5 THEN stanumbers5
- END AS elem_count_histogram
+ END AS elem_count_histogram,
+ CASE
+ WHEN stakind1 = 6 THEN stavalues1
+ WHEN stakind2 = 6 THEN stavalues2
+ WHEN stakind3 = 6 THEN stavalues3
+ WHEN stakind4 = 6 THEN stavalues4
+ WHEN stakind5 = 6 THEN stavalues5
+ END AS range_length_histogram,
+ CASE
+ WHEN stakind1 = 6 THEN stanumbers1[1]
+ WHEN stakind2 = 6 THEN stanumbers2[1]
+ WHEN stakind3 = 6 THEN stanumbers3[1]
+ WHEN stakind4 = 6 THEN stanumbers4[1]
+ WHEN stakind5 = 6 THEN stanumbers5[1]
+ END AS empty_range_frac,
+ CASE
+ WHEN stakind1 = 7 THEN stavalues1
+ WHEN stakind2 = 7 THEN stavalues2
+ WHEN stakind3 = 7 THEN stavalues3
+ WHEN stakind4 = 7 THEN stavalues4
+ WHEN stakind5 = 7 THEN stavalues5
+ END AS range_bounds_histograms
FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid)
JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum)
LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h
index 8770c5b4c60..10401dece0d 100644
--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -152,6 +152,9 @@ DECLARE_FOREIGN_KEY((starelid, staattnum), pg_attribute, (attrelid, attnum));
* data "kind" will appear in any particular slot. Instead, search the
* stakind fields to see if the desired data is available. (The standard
* function get_attstatsslot() may be used for this.)
+ *
+ * Note: The pg_stats view needs to be modified whenever a new slot kind is
+ * added to core.
*/
/*
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index e7a2f5856aa..ced5933271c 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2436,7 +2436,31 @@ pg_stats| SELECT n.nspname AS schemaname,
WHEN (s.stakind4 = 5) THEN s.stanumbers4
WHEN (s.stakind5 = 5) THEN s.stanumbers5
ELSE NULL::real[]
- END AS elem_count_histogram
+ END AS elem_count_histogram,
+ CASE
+ WHEN (s.stakind1 = 6) THEN s.stavalues1
+ WHEN (s.stakind2 = 6) THEN s.stavalues2
+ WHEN (s.stakind3 = 6) THEN s.stavalues3
+ WHEN (s.stakind4 = 6) THEN s.stavalues4
+ WHEN (s.stakind5 = 6) THEN s.stavalues5
+ ELSE NULL::anyarray
+ END AS range_length_histogram,
+ CASE
+ WHEN (s.stakind1 = 6) THEN s.stanumbers1[1]
+ WHEN (s.stakind2 = 6) THEN s.stanumbers2[1]
+ WHEN (s.stakind3 = 6) THEN s.stanumbers3[1]
+ WHEN (s.stakind4 = 6) THEN s.stanumbers4[1]
+ WHEN (s.stakind5 = 6) THEN s.stanumbers5[1]
+ ELSE NULL::real
+ END AS empty_range_frac,
+ CASE
+ WHEN (s.stakind1 = 7) THEN s.stavalues1
+ WHEN (s.stakind2 = 7) THEN s.stavalues2
+ WHEN (s.stakind3 = 7) THEN s.stavalues3
+ WHEN (s.stakind4 = 7) THEN s.stavalues4
+ WHEN (s.stakind5 = 7) THEN s.stavalues5
+ ELSE NULL::anyarray
+ END AS range_bounds_histograms
FROM (((pg_statistic s
JOIN pg_class c ON ((c.oid = s.starelid)))
JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum))))
--
2.39.0
From 9bd2bbef9dc092ab5fdace74e26f3afeaa62830d Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Fri, 20 Jan 2023 22:29:53 +0100
Subject: [PATCH 2/3] add functions to extract lower/upper ramge bounds
---
src/backend/catalog/system_views.sql | 19 ++-
src/backend/utils/adt/rangetypes_typanalyze.c | 118 ++++++++++++++++++
src/include/catalog/pg_proc.dat | 10 ++
3 files changed, 141 insertions(+), 6 deletions(-)
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index ccd6c7ffdb7..e3314550e32 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -259,12 +259,19 @@ CREATE VIEW pg_stats WITH (security_barrier) AS
WHEN stakind5 = 6 THEN stanumbers5[1]
END AS empty_range_frac,
CASE
- WHEN stakind1 = 7 THEN stavalues1
- WHEN stakind2 = 7 THEN stavalues2
- WHEN stakind3 = 7 THEN stavalues3
- WHEN stakind4 = 7 THEN stavalues4
- WHEN stakind5 = 7 THEN stavalues5
- END AS range_bounds_histograms
+ WHEN stakind1 = 7 THEN ranges_lower_bounds(stavalues1)
+ WHEN stakind2 = 7 THEN ranges_lower_bounds(stavalues2)
+ WHEN stakind3 = 7 THEN ranges_lower_bounds(stavalues3)
+ WHEN stakind4 = 7 THEN ranges_lower_bounds(stavalues4)
+ WHEN stakind5 = 7 THEN ranges_lower_bounds(stavalues5)
+ END AS range_lower_bounds_histograms,
+ CASE
+ WHEN stakind1 = 7 THEN ranges_upper_bounds(stavalues1)
+ WHEN stakind2 = 7 THEN ranges_upper_bounds(stavalues2)
+ WHEN stakind3 = 7 THEN ranges_upper_bounds(stavalues3)
+ WHEN stakind4 = 7 THEN ranges_upper_bounds(stavalues4)
+ WHEN stakind5 = 7 THEN ranges_upper_bounds(stavalues5)
+ END AS range_upper_bounds_histograms
FROM pg_statistic s JOIN pg_class c ON (c.oid = s.starelid)
JOIN pg_attribute a ON (c.oid = attrelid AND attnum = s.staattnum)
LEFT JOIN pg_namespace n ON (n.oid = c.relnamespace)
diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c
index 86810a1a6e6..a0097282fc0 100644
--- a/src/backend/utils/adt/rangetypes_typanalyze.c
+++ b/src/backend/utils/adt/rangetypes_typanalyze.c
@@ -26,6 +26,8 @@
#include "catalog/pg_operator.h"
#include "commands/vacuum.h"
+#include "utils/array.h"
+#include "utils/arrayaccess.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
@@ -427,3 +429,119 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
* hashtable should also go away, as it used a child memory context.
*/
}
+
+Datum
+ranges_lower_bounds(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0);
+ int ndims = AARR_NDIM(array);
+ int *dims = AARR_DIMS(array);
+ Oid element_type = AARR_ELEMTYPE(array);
+ int i;
+ array_iter iter;
+ int nelems;
+ Datum *elems;
+ TypeCacheEntry *typentry;
+ TypeCacheEntry *typentry_element;
+
+ /* Get information about range type; note column might be a domain */
+ typentry = range_get_typcache(fcinfo, getBaseType(element_type));
+
+ if (typentry->typtype != TYPTYPE_RANGE)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("expected array of ranges")));
+
+ ndims = AARR_NDIM(array);
+ dims = AARR_DIMS(array);
+ nelems = ArrayGetNItems(ndims, dims);
+
+ elems = (Datum *) palloc(nelems * sizeof(Datum));
+
+ array_iter_setup(&iter, array);
+
+ for (i = 0; i < nelems; i++)
+ {
+ Datum itemvalue;
+ bool isnull;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ /* Get source element, checking for NULL */
+ itemvalue = array_iter_next(&iter, &isnull, i,
+ typentry->typlen, typentry->typbyval,
+ typentry->typalign);
+
+ Assert(!isnull);
+
+ range_deserialize(typentry, (RangeType *) itemvalue, &lower, &upper, &empty);
+ elems[i] = lower.val;
+ }
+
+ typentry_element = typentry->rngelemtype;
+
+ PG_RETURN_ARRAYTYPE_P(construct_array(elems, nelems,
+ typentry_element->type_id,
+ typentry_element->typlen,
+ typentry_element->typbyval,
+ typentry_element->typalign));
+}
+
+Datum
+ranges_upper_bounds(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0);
+ int ndims = AARR_NDIM(array);
+ int *dims = AARR_DIMS(array);
+ Oid element_type = AARR_ELEMTYPE(array);
+ int i;
+ array_iter iter;
+ int nelems;
+ Datum *elems;
+ TypeCacheEntry *typentry;
+ TypeCacheEntry *typentry_element;
+
+ /* Get information about range type; note column might be a domain */
+ typentry = range_get_typcache(fcinfo, getBaseType(element_type));
+
+ if (typentry->typtype != TYPTYPE_RANGE)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("expected array of ranges")));
+
+ ndims = AARR_NDIM(array);
+ dims = AARR_DIMS(array);
+ nelems = ArrayGetNItems(ndims, dims);
+
+ elems = (Datum *) palloc(nelems * sizeof(Datum));
+
+ array_iter_setup(&iter, array);
+
+ for (i = 0; i < nelems; i++)
+ {
+ Datum itemvalue;
+ bool isnull;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ /* Get source element, checking for NULL */
+ itemvalue = array_iter_next(&iter, &isnull, i,
+ typentry->typlen, typentry->typbyval,
+ typentry->typalign);
+
+ Assert(!isnull);
+
+ range_deserialize(typentry, (RangeType *) itemvalue, &lower, &upper, &empty);
+ elems[i] = upper.val;
+ }
+
+ typentry_element = typentry->rngelemtype;
+
+ PG_RETURN_ARRAYTYPE_P(construct_array(elems, nelems,
+ typentry_element->type_id,
+ typentry_element->typlen,
+ typentry_element->typbyval,
+ typentry_element->typalign));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 86eb8e8c58a..161557e4cb2 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11891,4 +11891,14 @@
prorettype => 'bytea', proargtypes => 'pg_brin_minmax_multi_summary',
prosrc => 'brin_minmax_multi_summary_send' },
+{ oid => '9693', descr => 'lower bounds of ranges',
+ proname => 'ranges_lower_bounds', provolatile => 's',
+ prorettype => 'anyarray', proargtypes => 'anyarray',
+ prosrc => 'ranges_lower_bounds' },
+
+{ oid => '9694', descr => 'upper bounds of ranges',
+ proname => 'ranges_upper_bounds', provolatile => 's',
+ prorettype => 'anyarray', proargtypes => 'anyarray',
+ prosrc => 'ranges_upper_bounds' },
+
]
--
2.39.0
From 600fed1faae369b8291ec3a7fc64f6ad8099c124 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Fri, 20 Jan 2023 22:40:28 +0100
Subject: [PATCH 3/3] rename empty_range_frac
---
src/backend/catalog/system_views.sql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index e3314550e32..ac99d41f808 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -257,7 +257,7 @@ CREATE VIEW pg_stats WITH (security_barrier) AS
WHEN stakind3 = 6 THEN stanumbers3[1]
WHEN stakind4 = 6 THEN stanumbers4[1]
WHEN stakind5 = 6 THEN stanumbers5[1]
- END AS empty_range_frac,
+ END AS range_empty_frac,
CASE
WHEN stakind1 = 7 THEN ranges_lower_bounds(stavalues1)
WHEN stakind2 = 7 THEN ranges_lower_bounds(stavalues2)
--
2.39.0