On Tue, 17 Jun 2025 10:43:41 -0400
Andres Freund <and...@anarazel.de> wrote:

> Hi,
> 
> On 2025-04-22 18:10:06 +0900, Yugo Nagata wrote:
> > With your feedback, I would like to progress or rework the patch.
> 
> Right now the tests seem to always fail:
> https://cirrus-ci.com/github/postgresql-cfbot/postgresql/cf%2F571

Thank you for letting me know it.

I've attached an updated patch to fix the test failure.

However, I'm now reconsidering the current approach, where the expression
of a virtual generated column is expanded at the time of creating extended
statistics. This seems not be ideal, as the statistics would become useless
if the expression is later modified.

Instead, I'm thinking of an alternative approach: expanding the expression
at the time statistics are collected.

Best regards,
Yugo Nagata

> 
> Fails e.g. with:
> https://api.cirrus-ci.com/v1/artifact/task/5921189782093824/testrun/build/testrun/regress/regress/regression.diffs
> 
> diff -U3 /tmp/cirrus-ci-build/src/test/regress/expected/stats_ext.out 
> /tmp/cirrus-ci-build/build/testrun/regress/regress/results/stats_ext.out
> --- /tmp/cirrus-ci-build/src/test/regress/expected/stats_ext.out      
> 2025-05-26 00:59:01.813042000 +0000
> +++ /tmp/cirrus-ci-build/build/testrun/regress/regress/results/stats_ext.out  
> 2025-05-26 01:02:20.350387000 +0000
> @@ -56,7 +56,6 @@
>  ERROR:  unrecognized statistics kind "unrecognized"
>  -- incorrect expressions
>  CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
> -ERROR:  extended statistics require at least 2 columns
>  CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
>  ERROR:  syntax error at or near "+"
>  LINE 1: CREATE STATISTICS tst ON y + z FROM ext_stats_test;
> @@ -69,25 +68,24 @@
>  -- statistics on virtual generated column not allowed
>  CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) 
> VIRTUAL, w xid);
>  CREATE STATISTICS tst on z from ext_stats_test1;
> -ERROR:  statistics creation on virtual generated columns is not supported
>  CREATE STATISTICS tst on (z) from ext_stats_test1;
> -ERROR:  statistics creation on virtual generated columns is not supported
> +ERROR:  statistics object "tst" already exists
>  CREATE STATISTICS tst on (z+1) from ext_stats_test1;
> -ERROR:  statistics creation on virtual generated columns is not supported
> +ERROR:  statistics object "tst" already exists
>  CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1;
> -ERROR:  statistics creation on virtual generated columns is not supported
> +ERROR:  statistics object "tst" already exists
>  -- statistics on system column not allowed
>  CREATE STATISTICS tst on tableoid from ext_stats_test1;
> -ERROR:  statistics creation on system columns is not supported
> +ERROR:  statistics object "tst" already exists
>  CREATE STATISTICS tst on (tableoid) from ext_stats_test1;
> -ERROR:  statistics creation on system columns is not supported
> +ERROR:  statistics object "tst" already exists
>  CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test1;
> -ERROR:  statistics creation on system columns is not supported
> +ERROR:  statistics object "tst" already exists
>  CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test1;
> -ERROR:  statistics creation on system columns is not supported
> +ERROR:  statistics object "tst" already exists
>  -- statistics without a less-than operator not supported
>  CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test1;
> -ERROR:  column "w" cannot be used in statistics because its type xid has no 
> default btree operator class
> +ERROR:  statistics object "tst" already exists
>  DROP TABLE ext_stats_test1;
>  -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
>  CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
> 
> Greetings,
> 
> Andres
> 
> 


-- 
Yugo Nagata <nag...@sraoss.co.jp>
>From f6151679d4b222bd5c60107322486ea90fa951d2 Mon Sep 17 00:00:00 2001
From: Yugo Nagata <nag...@sraoss.co.jp>
Date: Tue, 22 Apr 2025 17:03:50 +0900
Subject: [PATCH v2] Allow to create extended statistics on virtual generated
 columns

---
 src/backend/commands/statscmds.c        | 88 ++++++++++++-------------
 src/test/regress/expected/stats_ext.out | 11 +---
 src/test/regress/sql/stats_ext.sql      |  7 +-
 3 files changed, 43 insertions(+), 63 deletions(-)

diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index e24d540cd45..1875ea26879 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -29,6 +29,7 @@
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
+#include "rewrite/rewriteHandler.h"
 #include "statistics/statistics.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
@@ -240,59 +241,56 @@ CreateStatistics(CreateStatsStmt *stmt)
 								attname)));
 			attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
 
-			/* Disallow use of system attributes in extended stats */
-			if (attForm->attnum <= 0)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("statistics creation on system columns is not supported")));
-
-			/* Disallow use of virtual generated columns in extended stats */
 			if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("statistics creation on virtual generated columns is not supported")));
+				selem->expr = build_generation_expression(rel, attForm->attnum);
+			else
+			{
+				/* Disallow use of system attributes in extended stats */
+				if (attForm->attnum <= 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("statistics creation on system columns is not supported")));
 
-			/* Disallow data types without a less-than operator */
-			type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
-			if (type->lt_opr == InvalidOid)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
-								attname, format_type_be(attForm->atttypid))));
+				/* Disallow data types without a less-than operator */
+				type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+				if (type->lt_opr == InvalidOid)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+									attname, format_type_be(attForm->atttypid))));
 
-			attnums[nattnums] = attForm->attnum;
-			nattnums++;
+				attnums[nattnums] = attForm->attnum;
+				nattnums++;
+			}
 			ReleaseSysCache(atttuple);
 		}
-		else if (IsA(selem->expr, Var)) /* column reference in parens */
+
+		if (selem->expr && IsA(selem->expr, Var)) /* column reference in parens */
 		{
 			Var		   *var = (Var *) selem->expr;
 			TypeCacheEntry *type;
 
-			/* Disallow use of system attributes in extended stats */
-			if (var->varattno <= 0)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("statistics creation on system columns is not supported")));
-
-			/* Disallow use of virtual generated columns in extended stats */
-			if (get_attgenerated(relid, var->varattno) == ATTRIBUTE_GENERATED_VIRTUAL)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("statistics creation on virtual generated columns is not supported")));
+			if (get_attgenerated(relid, var->varattno) != ATTRIBUTE_GENERATED_VIRTUAL)
+			{
+				/* Disallow use of system attributes in extended stats */
+				if (var->varattno <= 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("statistics creation on system columns is not supported")));
 
-			/* Disallow data types without a less-than operator */
-			type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
-			if (type->lt_opr == InvalidOid)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
-								get_attname(relid, var->varattno, false), format_type_be(var->vartype))));
+				/* Disallow data types without a less-than operator */
+				type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
+				if (type->lt_opr == InvalidOid)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
+									get_attname(relid, var->varattno, false), format_type_be(var->vartype))));
 
-			attnums[nattnums] = var->varattno;
-			nattnums++;
+				attnums[nattnums] = var->varattno;
+				nattnums++;
+			}
 		}
-		else					/* expression */
+		else if (selem->expr)	/* expression */
 		{
 			Node	   *expr = selem->expr;
 			Oid			atttype;
@@ -302,6 +300,8 @@ CreateStatistics(CreateStatsStmt *stmt)
 
 			Assert(expr != NULL);
 
+			expr = expand_generated_columns_in_expr(expr, rel, 1);
+
 			pull_varattnos(expr, 1, &attnums);
 
 			k = -1;
@@ -314,12 +314,6 @@ CreateStatistics(CreateStatsStmt *stmt)
 					ereport(ERROR,
 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 							 errmsg("statistics creation on system columns is not supported")));
-
-				/* Disallow use of virtual generated columns in extended stats */
-				if (get_attgenerated(relid, attnum) == ATTRIBUTE_GENERATED_VIRTUAL)
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("statistics creation on virtual generated columns is not supported")));
 			}
 
 			/*
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 6359e5fb689..0c29f060a52 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -66,17 +66,8 @@ ERROR:  syntax error at or near ","
 LINE 1: CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
                                    ^
 DROP TABLE ext_stats_test;
--- statistics on virtual generated column not allowed
-CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) VIRTUAL, w xid);
-CREATE STATISTICS tst on z from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst on (z) from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst on (z+1) from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
 -- statistics on system column not allowed
+CREATE TABLE ext_stats_test1 (w xid);
 CREATE STATISTICS tst on tableoid from ext_stats_test1;
 ERROR:  statistics creation on system columns is not supported
 CREATE STATISTICS tst on (tableoid) from ext_stats_test1;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index da4f2fe9c93..437d967e371 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -45,13 +45,8 @@ CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
 CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
 CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
 DROP TABLE ext_stats_test;
--- statistics on virtual generated column not allowed
-CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) VIRTUAL, w xid);
-CREATE STATISTICS tst on z from ext_stats_test1;
-CREATE STATISTICS tst on (z) from ext_stats_test1;
-CREATE STATISTICS tst on (z+1) from ext_stats_test1;
-CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1;
 -- statistics on system column not allowed
+CREATE TABLE ext_stats_test1 (w xid);
 CREATE STATISTICS tst on tableoid from ext_stats_test1;
 CREATE STATISTICS tst on (tableoid) from ext_stats_test1;
 CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test1;
-- 
2.43.0

Reply via email to