From 6adfaccef2a6594e69c4a599b77a2f825f58faf6 Mon Sep 17 00:00:00 2001
From: hari krishna <hmaddileti@vmware.com>
Date: Thu, 26 Jan 2023 15:06:25 +0530
Subject: [PATCH v2] Implement input functions for extended statistics types

In order restore a dumped extended statistics (stxdndistinct, stxddependencies, stxdmcv) we need to provide input functions to parse pg_distinct/pg_dependency/pg_mcv_list strings.

Today we get the ERROR "cannot accept a value of type pg_ndistinct/pg_dependencies/pg_mcv_list" when we try to do an insert of any type.

Approach :
- Using yacc grammar file (statistics_gram.y) to parse the input string to its internal format for the types pg_distinct and pg_dependencies
- We are just calling byteain() for serialized input text of type pg_mcv_list.
- Add regress testcases for intput  functions pg_dependencies_in and pg_ndistinct_in

Usecase:
        - Helps for reproducing complex customer issues locally
        - dump and restore using pg_upgrade and pg_restore

Co-authored-by: David Kimura dkimura@vmware.com
---
 src/backend/Makefile                          |   4 +
 src/backend/statistics/.gitignore             |   3 +
 src/backend/statistics/Makefile               |  16 +-
 src/backend/statistics/dependencies.c         |  28 +-
 src/backend/statistics/mcv.c                  |  14 +-
 src/backend/statistics/meson.build            |  25 ++
 src/backend/statistics/mvdistinct.c           |  25 +-
 src/backend/statistics/statistics_gram.y      | 173 +++++++++++
 src/backend/statistics/statistics_scanner.l   |  88 ++++++
 .../statistics/extended_stats_internal.h      |  13 +
 src/include/statistics/statistics.h           |   3 +
 src/test/regress/expected/stats_ext.out       | 280 ++++++++++++++++++
 src/test/regress/sql/stats_ext.sql            |  99 +++++++
 src/tools/msvc/Mkvcbuild.pm                   |   3 +
 src/tools/msvc/clean.bat                      |   3 +-
 15 files changed, 746 insertions(+), 31 deletions(-)
 create mode 100644 src/backend/statistics/.gitignore
 create mode 100644 src/backend/statistics/statistics_gram.y
 create mode 100644 src/backend/statistics/statistics_scanner.l

diff --git a/src/backend/Makefile b/src/backend/Makefile
index 3d851f757f..b2e63529c7 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -180,6 +180,7 @@ distprep:
 	$(MAKE) -C bootstrap	bootparse.c bootparse.h bootscanner.c
 	$(MAKE) -C catalog	distprep
 	$(MAKE) -C nodes	distprep
+	$(MAKE) -C statistics statistics_gram.c statistics_scanner.c
 	$(MAKE) -C replication	repl_gram.c repl_gram.h repl_scanner.c syncrep_gram.c syncrep_gram.h syncrep_scanner.c
 	$(MAKE) -C storage/lmgr	lwlocknames.h lwlocknames.c
 	$(MAKE) -C utils	distprep
@@ -300,6 +301,9 @@ maintainer-clean: distclean
 	      parser/gram.c \
 	      parser/gram.h \
 	      parser/scan.c \
+	      statistics/statistics_gram.c \
+	      statistics/statistics_scanner.c \
+	      statistics/statistics_gram.h \
 	      replication/repl_gram.c \
 	      replication/repl_gram.h \
 	      replication/repl_scanner.c \
diff --git a/src/backend/statistics/.gitignore b/src/backend/statistics/.gitignore
new file mode 100644
index 0000000000..ef88ec4b8f
--- /dev/null
+++ b/src/backend/statistics/.gitignore
@@ -0,0 +1,3 @@
+/statistics_gram.c
+/statistics_scanner.c
+/statistics_gram.h
diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile
index 89cf8c2797..5b99c7fd59 100644
--- a/src/backend/statistics/Makefile
+++ b/src/backend/statistics/Makefile
@@ -16,6 +16,20 @@ OBJS = \
 	dependencies.o \
 	extended_stats.o \
 	mcv.o \
-	mvdistinct.o
+	mvdistinct.o \
+	statistics_gram.o \
+	statistics_scanner.o
 
 include $(top_srcdir)/src/backend/common.mk
+
+# See notes in src/backend/parser/Makefile about the following two rules
+statistics_gram.h: statistics_gram.c
+	touch $@
+
+statistics_gram.c: BISONFLAGS += -d
+
+# Force these dependencies to be known even without dependency info built:
+statistics_gram.o statistics_scanner.o: statistics_gram.h
+
+# statistics_gram.c, statistics_scanner are not cleaned here.
+# (Our parent Makefile takes care of them during maintainer-clean.)
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index e6e2835345..583a866e09 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -88,6 +88,7 @@ static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clause
 												 int ndependencies,
 												 AttrNumber *list_attnums,
 												 Bitmapset **estimatedclauses);
+extern void statistic_scanner_init(const char *query_string);
 
 static void
 generate_dependencies_recurse(DependencyGenerator state, int index,
@@ -648,21 +649,26 @@ statext_dependencies_load(Oid mvoid, bool inh)
 /*
  * pg_dependencies_in		- input routine for type pg_dependencies.
  *
- * pg_dependencies is real enough to be a table column, but it has no operations
- * of its own, and disallows input too
+ * converts the dependencies from the external format in "string" to its
+ * internal format.
  */
 Datum
 pg_dependencies_in(PG_FUNCTION_ARGS)
 {
-	/*
-	 * pg_node_list stores the data in binary form and parsing text input is
-	 * not needed, so disallow this.
-	 */
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("cannot accept a value of type %s", "pg_dependencies")));
-
-	PG_RETURN_VOID();			/* keep compiler quiet */
+	char	   *str = PG_GETARG_CSTRING(0);
+	MVDependencies *mvdependencies;
+	int			parse_rc;
+
+	statistic_scanner_init(str);
+	parse_rc = statistic_yyparse();
+	if (parse_rc != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("failed to parse a value of type %s", "pg_dependencies")));
+	statistic_scanner_finish();
+	mvdependencies = mvdependencies_parse_result;
+
+	PG_RETURN_MVDependencies_P(statext_dependencies_serialize(mvdependencies));
 }
 
 /*
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 2d2a87d3a6..3eb26b3e6b 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1469,21 +1469,13 @@ pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
 /*
  * pg_mcv_list_in		- input routine for type pg_mcv_list.
  *
- * pg_mcv_list is real enough to be a table column, but it has no operations
- * of its own, and disallows input too
+ * converts serialized text MCV lists into a byte values by simply
+ * calling byeain().
  */
 Datum
 pg_mcv_list_in(PG_FUNCTION_ARGS)
 {
-	/*
-	 * pg_mcv_list stores the data in binary form and parsing text input is
-	 * not needed, so disallow this.
-	 */
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
-
-	PG_RETURN_VOID();			/* keep compiler quiet */
+	PG_RETURN_DATUM(byteain(fcinfo));
 }
 
 
diff --git a/src/backend/statistics/meson.build b/src/backend/statistics/meson.build
index e12737b011..0215d1f77f 100644
--- a/src/backend/statistics/meson.build
+++ b/src/backend/statistics/meson.build
@@ -6,3 +6,28 @@ backend_sources += files(
   'mcv.c',
   'mvdistinct.c',
 )
+
+ext_stats_parser_sources = []
+
+statistics_scanner = custom_target('statistics_scanner',
+  input: 'statistics_scanner.l',
+  output: 'statistics_scanner.c',
+  command: flex_cmd,
+)
+generated_sources += statistics_scanner
+ext_stats_parser_sources += statistics_scanner
+
+statistics_gram = custom_target('statistics_gram',
+  input: 'statistics_gram.y',
+  kwargs: bison_kw,
+)
+generated_sources += statistics_gram.to_list()
+ext_stats_parser_sources += statistics_gram
+
+stats_parser = static_library('stats_parser',
+  ext_stats_parser_sources,
+  dependencies: [backend_code],
+  include_directories: include_directories('.'),
+  kwargs: internal_lib_args,
+)
+backend_link_with += stats_parser
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 13301a3157..4d5ff3c7e4 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -42,6 +42,8 @@ static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
 static int	n_choose_k(int n, int k);
 static int	num_combinations(int n);
 
+extern void statistic_scanner_init(const char *query_string);
+
 /* size of the struct header fields (magic, type, nitems) */
 #define SizeOfHeader		(3 * sizeof(uint32))
 
@@ -333,17 +335,26 @@ statext_ndistinct_deserialize(bytea *data)
  * pg_ndistinct_in
  *		input routine for type pg_ndistinct
  *
- * pg_ndistinct is real enough to be a table column, but it has no
- * operations of its own, and disallows input (just like pg_node_tree).
+ * converts the distinct from the external format in "string" to its internal
+ * format.
  */
 Datum
 pg_ndistinct_in(PG_FUNCTION_ARGS)
 {
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("cannot accept a value of type %s", "pg_ndistinct")));
-
-	PG_RETURN_VOID();			/* keep compiler quiet */
+	char	   *str = PG_GETARG_CSTRING(0);
+	MVNDistinct *mvndistinct;
+	int			parse_rc;
+
+	statistic_scanner_init(str);
+	parse_rc = statistic_yyparse();
+	if (parse_rc != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("failed to parse a value of type %s", "pg_ndistinct")));
+	statistic_scanner_finish();
+	mvndistinct = mvndistinct_parse_result;
+
+	PG_RETURN_MVNDistinct_P(statext_ndistinct_serialize(mvndistinct));
 }
 
 /*
diff --git a/src/backend/statistics/statistics_gram.y b/src/backend/statistics/statistics_gram.y
new file mode 100644
index 0000000000..f8adda8814
--- /dev/null
+++ b/src/backend/statistics/statistics_gram.y
@@ -0,0 +1,173 @@
+%{
+#include "postgres.h"
+
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+
+MVNDistinct *mvndistinct_parse_result;
+MVDependencies *mvdependencies_parse_result;
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc.  This prevents
+ * memory leaks if we error out during parsing.  Note this only works with
+ * bison >= 2.0.  However, in bison 1.875 the default is to use alloca()
+ * if possible, so there's not really much problem anyhow, at least if
+ * you're building with gcc.
+ */
+#define YYMALLOC palloc
+#define YYFREE   pfree
+
+%}
+
+%expect 0
+%name-prefix="statistic_yy"
+
+
+%union {
+		uint32					uintval;
+		double					doubleval;
+
+		MVNDistinct				*ndistinct;
+		MVNDistinctItem				*ndistinct_item;
+
+		MVDependencies				*dependencies;
+		MVDependency				*dependency;
+
+		Bitmapset				*bitmap;
+		List					*list;
+}
+
+/* Non-keyword tokens */
+%token <uintval> UCONST
+%token <doubleval> DOUBLE
+%token ARROW
+
+%type <ndistinct>	ndistinct
+%type <ndistinct_item>	ndistinct_item
+%type <list>	ndistinct_item_list
+
+%type <dependencies>	dependencies
+%type <list>	dependency_item_list
+%type <dependency>	dependency_item
+
+%type <bitmap>	attrs
+
+%%
+
+extended_statistic:
+	ndistinct { } |
+	dependencies { }
+	;
+
+/*
+ * "ndistinct" rule helps to parse the input string recursively and stores the output into MVNDistinct structure.
+ * Exmple:
+ * 	intput : '{"1, 2": 1,"2, 3": 2, "3, 1", 2}'
+ * 	output : returns MVNDistinct object
+*/
+ndistinct:
+	'{' ndistinct_item_list '}'
+		{
+			ListCell *cell;
+			$$ = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
+						 list_length($2) * sizeof(MVNDistinctItem));
+			mvndistinct_parse_result = $$;
+			$$->magic = STATS_NDISTINCT_MAGIC;
+			$$->type = STATS_NDISTINCT_TYPE_BASIC;
+			$$->nitems = list_length($2);
+			MVNDistinctItem *pointer = $$->items;
+			foreach (cell, $2)
+			{
+				memcpy(pointer, lfirst(cell), sizeof(MVNDistinctItem));
+				pointer += 1;
+			}
+		}
+	;
+
+ndistinct_item_list:
+	ndistinct_item_list ',' ndistinct_item
+		{
+			$$ = lappend($1, $3);
+		}
+	| ndistinct_item { $$ = lappend(NIL, $1);}
+	;
+
+ndistinct_item:
+	'"' attrs '"' ':' DOUBLE
+		{
+			int attrCount = 0;
+			$$ = (MVNDistinctItem *)palloc0(sizeof(MVNDistinctItem));
+			$$->attributes = build_attnums_array($2, 0, &attrCount);
+			$$->nattributes = attrCount;
+			$$->ndistinct = $5;
+		}
+	;
+
+attrs:
+	attrs ',' UCONST
+		{
+			$$ = bms_add_member($1, $3);
+
+		}
+	| UCONST ',' UCONST
+		{
+			$$ = bms_make_singleton($1);
+			$$ = bms_add_member($$, $3);
+		}
+	;
+
+/*
+ * "dependencies" rule helps to parse the input string recursively and stores the output into MVDependencies structure.
+ * example:
+ *	intput : '{"1 => 2": 1.000000, "2 => 3": 2.000000}'
+ * 	output : returns MVDependencies	object
+*/
+dependencies:
+	'{' dependency_item_list '}'
+		{
+			$$ = palloc0(MAXALIGN(offsetof(MVDependencies, deps)) + list_length($2) * sizeof(MVDependency *));
+			mvdependencies_parse_result = $$;
+
+			$$->magic = STATS_DEPS_MAGIC;
+			$$->type = STATS_DEPS_TYPE_BASIC;
+			$$->ndeps = list_length($2);
+
+			for (int i=0; i<$$->ndeps; i++)
+			{
+				$$->deps[i] = list_nth($2, i);
+			}
+		}
+	;
+
+dependency_item_list:
+	dependency_item_list ',' dependency_item
+		{
+			$$ = lappend($1, $3);
+		}
+	| dependency_item { $$ = lappend(NIL, $1);}
+	;
+
+dependency_item:
+	'"' attrs ARROW UCONST '"' ':' DOUBLE
+	{
+		int attrCount = 0;
+		AttrNumber *ptr = build_attnums_array($2, 0, &attrCount);
+		$$ = (MVDependency *)palloc0(sizeof(MVDependency) + sizeof(AttrNumber) * (attrCount + 1));
+		$$->nattributes = attrCount + 1;
+		$$->degree = $7;
+		for (int i = 0; i < attrCount; i++)
+		{
+			$$->attributes[i] = *(ptr+i);
+		}
+		$$->attributes[$$->nattributes - 1] = $4;
+	}
+	| '"' UCONST ARROW UCONST '"' ':' DOUBLE
+	{
+		$$ = (MVDependency *)palloc0(sizeof(MVDependency) + sizeof(AttrNumber) * 2);
+		$$->nattributes = 2;
+		$$->degree = $7;
+		$$->attributes[0] = $2;
+		$$->attributes[1] = $4;
+	}
+	;
+%%
diff --git a/src/backend/statistics/statistics_scanner.l b/src/backend/statistics/statistics_scanner.l
new file mode 100644
index 0000000000..27cab8268e
--- /dev/null
+++ b/src/backend/statistics/statistics_scanner.l
@@ -0,0 +1,88 @@
+%{
+#include "postgres.h"
+
+#include "utils/builtins.h"
+#include "parser/scansup.h"
+
+#include "statistics/statistics.h"
+#include "statistics_gram.h"
+
+/* Handle to the buffer that the lexer uses internally */
+static YY_BUFFER_STATE scanbufhandle;
+%}
+
+
+%option noinput
+%option nounput
+%option noyywrap
+%option prefix="statistic_yy"
+
+digit			[0-9]+
+double			[0-9.]
+
+%%
+
+[ \t\n]		;
+"{"			{ return '{'; }
+"}"			{ return '}'; }
+","			{ return ','; }
+"\""			{ return '"'; }
+":"			{ return ':'; }
+"=>"			{ return ARROW; }
+
+{digit}+		{
+				statistic_yylval.uintval = strtoul(yytext, NULL, 10);
+				return UCONST;
+			}
+{double}+		{
+				statistic_yylval.doubleval = strtod(yytext, NULL);
+				return DOUBLE;
+			}
+
+%%
+
+void
+statistic_yyerror(const char *message)
+{
+	if (*yytext == YY_END_OF_BUFFER_CHAR)
+	{
+		ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
+			errmsg("invalid input syntax for extended stats type"),
+			errdetail("%s at end of input", message)));
+	}
+	else
+	{
+		ereport(ERROR,
+			(errcode(ERRCODE_SYNTAX_ERROR),
+			errmsg("invalid input syntax for extended stats type"),
+			errdetail("%s at or near \"%s\"", message, yytext)));
+	}
+}
+
+void
+statistic_scanner_init(const char *str)
+{
+	Size		slen = strlen(str);
+	char	   *scanbuf;
+
+	/*
+	 * Might be left over after ereport()
+	 */
+	if (YY_CURRENT_BUFFER)
+		yy_delete_buffer(YY_CURRENT_BUFFER);
+
+	/*
+	 * Make a scan buffer with special termination needed by flex.
+	 */
+	scanbuf = (char *) palloc(slen + 2);
+	memcpy(scanbuf, str, slen);
+	scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+	scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
+}
+
+void
+statistic_scanner_finish(void)
+{
+	yy_delete_buffer(scanbufhandle);
+	scanbufhandle = NULL;
+}
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 7b55eb8ffa..a58b6c5ad3 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -127,4 +127,17 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
 											 Selectivity *overlap_basesel,
 											 Selectivity *totalsel);
 
+/*
+ * Internal functions for parsing the statistics grammar, in statiatics_gram.y and
+ * statistics_scanner.l
+ */
+extern int	statistic_yyparse(void);
+extern int	statistic_yylex(void);
+extern void statistic_yyerror(const char *str);
+extern void statistic_scanner_init(const char *query_string);
+extern void statistic_scanner_finish(void);
+
+extern MVNDistinct *mvndistinct_parse_result;
+extern MVDependencies *mvdependencies_parse_result;
+
 #endif							/* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 17e3e7f881..acd5fdee0a 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -22,6 +22,9 @@
 #define STATS_NDISTINCT_MAGIC		0xA352BFA4	/* struct identifier */
 #define STATS_NDISTINCT_TYPE_BASIC	1	/* struct version */
 
+#define PG_RETURN_MVNDistinct_P(X) return PointerGetDatum(X)
+#define PG_RETURN_MVDependencies_P(X) return PointerGetDatum(X)
+
 /* MVNDistinctItem represents a single combination of columns */
 typedef struct MVNDistinctItem
 {
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 03880874c1..6e7bab223f 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -3290,3 +3290,283 @@ NOTICE:  drop cascades to 2 other objects
 DETAIL:  drop cascades to table tststats.priv_test_tbl
 drop cascades to view tststats.priv_test_view
 DROP USER regress_stats_user1;
+    -- Test pg_ndistinct_in
+drop table if exists tbl_distinct;
+NOTICE:  table "tbl_distinct" does not exist, skipping
+create table tbl_distinct(i int, ii pg_ndistinct);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tbl_distinct values (1, '{"1, 2": 1}');
+insert into tbl_distinct values (2, '{"1, 2": 2, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3}');
+insert into tbl_distinct values (3, '{"123, 234": 11}');
+select * from tbl_distinct;
+ i |                       ii
+---+-------------------------------------------------
+ 2 | {"1, 2": 2, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3}
+ 3 | {"123, 234": 11}
+ 1 | {"1, 2": 1}
+(3 rows)
+
+-- leading space
+insert into tbl_distinct values (1, ' {"1, 2": 1}');
+-- trailing space
+insert into tbl_distinct values (1, '{"1, 2": 1} ');
+-- unmatched quote
+insert into tbl_distinct values (1, '{"1", 2": 1} ');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1", 2": 1} ');
+                                            ^
+DETAIL:  syntax error at or near """
+-- space in attribute list
+insert into tbl_distinct values (1, '{"1 3, 2": 1} ');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1 3, 2": 1} ');
+                                            ^
+DETAIL:  syntax error at or near "3"
+-- colon in attribute list
+insert into tbl_distinct values (1, '{"1: 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1: 2": 1}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_distinct values (1, '{"1, 2:" 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1, 2:" 1}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_distinct values (1, '{":1 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{":1 2": 1}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+-- zero/single item attribute list
+insert into tbl_distinct values (1, '{"1": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1": 1}');
+                                            ^
+DETAIL:  syntax error at or near """
+insert into tbl_distinct values (1, '{: 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{: 1}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_distinct values (1, '{"": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"": 1}');
+                                            ^
+DETAIL:  syntax error at or near """
+insert into tbl_distinct values (1, '{" ": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{" ": 1}');
+                                            ^
+DETAIL:  syntax error at or near """
+insert into tbl_distinct values (1, '{}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{}');
+                                            ^
+DETAIL:  syntax error at or near "}"
+insert into tbl_distinct values (1, '{:}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{:}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+-- illegal character
+insert into tbl_distinct values (1, '{"1,| 2": 1}');
+-- multiple consecutive characters
+insert into tbl_distinct values (1, '{"1,, 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1,, 2": 1}');
+                                            ^
+DETAIL:  syntax error at or near ","
+insert into tbl_distinct values (1, '{"1": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1": 1}');
+                                            ^
+DETAIL:  syntax error at or near """
+-- Need to add check on catalog table insert that atribute numbers are legal
+-- (e.g. there shouldn't be attribute number 100 for a table with only 2
+-- columns also it should match)
+select * from tbl_distinct;
+ i |                       ii
+---+-------------------------------------------------
+ 1 | {"1, 2": 1}
+ 1 | {"1, 2": 1}
+ 1 | {"1, 2": 1}
+ 1 | {"1, 2": 1}
+ 2 | {"1, 2": 2, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3}
+ 3 | {"123, 234": 11}
+(6 rows)
+
+    -- Test pg_dependencies_in
+drop table if exists tbl_dependencies;
+NOTICE:  table "tbl_dependencies" does not exist, skipping
+create table tbl_dependencies(i int, ii pg_dependencies);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000}');
+insert into tbl_dependencies values (2, '{"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}');
+insert into tbl_dependencies values (1, '{"1, 2, 3, 5 => 4": 3.000000}');
+select * from tbl_dependencies;
+ i |                                         ii
+---+-------------------------------------------------------------------------------------
+ 1 | {"1 => 2": 1.000000}
+ 1 | {"1, 2, 3, 5 => 4": 3.000000}
+ 2 | {"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}
+(3 rows)
+
+-- leading space
+insert into tbl_dependencies values (1, ' {"1 => 2": 1.000000}');
+-- trailing space
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000} ');
+-- unmatched quote
+insert into tbl_dependencies values (1, '{"1" => 2": 1.000000} ');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1" => 2": 1.00000...
+                                                ^
+DETAIL:  syntax error at or near """
+-- Wrong format
+insert into tbl_dependencies values (1, '{"1, 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1, 2": 1.000000}'...
+                                                ^
+DETAIL:  syntax error at or near "1.000000"
+insert into tbl_dependencies values (1, '{"1 => 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 => 2": 1}');
+                                                ^
+DETAIL:  syntax error at or near "1"
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000, " 2 => 1": 2}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 => 2": 1.000000...
+                                                ^
+DETAIL:  syntax error at or near "2"
+-- space in attribute list
+insert into tbl_dependencies values (1, '{"1 3 => 2": 1.000000} ');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 3 => 2": 1.0000...
+                                                ^
+DETAIL:  syntax error at or near "3"
+-- colon in attribute list
+insert into tbl_dependencies values (1, '{"1: 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1: 2": 1.000000}'...
+                                                ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_dependencies values (1, '{"1 => 2:" 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 => 2:" 1.000000...
+                                                ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_dependencies values (1, '{":1 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{":1 2": 1.000000}'...
+                                                ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_dependencies values (1, '{"1, 2" 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1, 2" 1.000000}')...
+                                                ^
+DETAIL:  syntax error at or near "1.000000"
+-- zero/single item attribute list
+insert into tbl_dependencies values (1, '{"1": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1": 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near """
+insert into tbl_dependencies values (1, '{: 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{: 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_dependencies values (1, '{"": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"": 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near """
+insert into tbl_dependencies values (1, '{" ": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{" ": 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near """
+insert into tbl_dependencies values (1, '{}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{}');
+                                                ^
+DETAIL:  syntax error at or near "}"
+insert into tbl_dependencies values (1, '{:}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{:}');
+                                                ^
+DETAIL:  syntax error at or near ":"
+-- multiple consecutive characters
+insert into tbl_dependencies values (1, '{"1 =>=> 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 =>=> 2": 1.0000...
+                                                ^
+DETAIL:  syntax error at or near "=>"
+insert into tbl_dependencies values (1, '{"1": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1": 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near """
+select * from tbl_dependencies;
+ i |                                         ii
+---+-------------------------------------------------------------------------------------
+ 2 | {"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}
+ 1 | {"1 => 2": 1.000000}
+ 1 | {"1, 2, 3, 5 => 4": 3.000000}
+ 1 | {"1 => 2": 1.000000}
+ 1 | {"1 => 2": 1.000000}
+(5 rows)
+
+-- Test a table with columns of type pg_ndistinct and pg_dependencies
+drop table if exists tbl_dist_dep;
+NOTICE:  table "tbl_dist_dep" does not exist, skipping
+create table tbl_dist_dep(i pg_ndistinct, ii pg_dependencies);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2" : 3, "1, 3" : 3, "2, 3" : 2, "1, 2, 3" : 3}', '{"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}');
+-- unmatched quote
+insert into tbl_dist_dep values ('{1, 2 : 1}', '{"1 => 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{1, 2 : 1}', '{"1 => 2": 1...
+                                         ^
+DETAIL:  syntax error at or near "1"
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{1 => 2: 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2" : 1}', '{1 => 2: 1...
+                                                         ^
+DETAIL:  syntax error at or near "1"
+insert into tbl_dist_dep values ('{1, 2 : 1}', '{1 => 2: 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{1, 2 : 1}', '{1 => 2: 1.0...
+                                         ^
+DETAIL:  syntax error at or near "1"
+-- Invalid type
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2":...
+                                                         ^
+DETAIL:  syntax error at or near "1"
+insert into tbl_dist_dep values ('{"1, 2" : 1.000000}', '{"1 => 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2" : 1.000000}', '{"1...
+                                         ^
+DETAIL:  syntax error at or near "1.000000"
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1.000000 => 2.000000": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1.000000...
+                                                         ^
+DETAIL:  syntax error at or near "1.000000"
+insert into tbl_dist_dep values ('{"1, 2.000000" : 1}', '{"1 => 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2.000000" : 1}', '{"1...
+                                         ^
+DETAIL:  syntax error at or near "2.000000"
+select * from tbl_dist_dep;
+                        i                        |                                         ii
+-------------------------------------------------+-------------------------------------------------------------------------------------
+ {"1, 2": 3, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3} | {"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}
+ {"1, 2": 1}                                     | {"1 => 2": 1.000000}
+(2 rows)
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index d0d42cd013..98943e90d4 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1663,3 +1663,102 @@ DROP FUNCTION op_leak(int, int);
 RESET SESSION AUTHORIZATION;
 DROP SCHEMA tststats CASCADE;
 DROP USER regress_stats_user1;
+
+    -- Test pg_ndistinct_in
+drop table if exists tbl_distinct;
+create table tbl_distinct(i int, ii pg_ndistinct);
+insert into tbl_distinct values (1, '{"1, 2": 1}');
+insert into tbl_distinct values (2, '{"1, 2": 2, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3}');
+insert into tbl_distinct values (3, '{"123, 234": 11}');
+select * from tbl_distinct;
+
+-- leading space
+insert into tbl_distinct values (1, ' {"1, 2": 1}');
+-- trailing space
+insert into tbl_distinct values (1, '{"1, 2": 1} ');
+-- unmatched quote
+insert into tbl_distinct values (1, '{"1", 2": 1} ');
+-- space in attribute list
+insert into tbl_distinct values (1, '{"1 3, 2": 1} ');
+-- colon in attribute list
+insert into tbl_distinct values (1, '{"1: 2": 1}');
+insert into tbl_distinct values (1, '{"1, 2:" 1}');
+insert into tbl_distinct values (1, '{":1 2": 1}');
+-- zero/single item attribute list
+insert into tbl_distinct values (1, '{"1": 1}');
+insert into tbl_distinct values (1, '{: 1}');
+insert into tbl_distinct values (1, '{"": 1}');
+insert into tbl_distinct values (1, '{" ": 1}');
+insert into tbl_distinct values (1, '{}');
+insert into tbl_distinct values (1, '{:}');
+-- illegal character
+insert into tbl_distinct values (1, '{"1,| 2": 1}');
+
+-- multiple consecutive characters
+insert into tbl_distinct values (1, '{"1,, 2": 1}');
+insert into tbl_distinct values (1, '{"1": 1}');
+
+-- Need to add check on catalog table insert that atribute numbers are legal
+-- (e.g. there shouldn't be attribute number 100 for a table with only 2
+-- columns also it should match)
+
+select * from tbl_distinct;
+
+
+-- Test pg_dependencies_in
+drop table if exists tbl_dependencies;
+create table tbl_dependencies(i int, ii pg_dependencies);
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000}');
+insert into tbl_dependencies values (2, '{"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}');
+insert into tbl_dependencies values (1, '{"1, 2, 3, 5 => 4": 3.000000}');
+
+select * from tbl_dependencies;
+
+-- leading space
+insert into tbl_dependencies values (1, ' {"1 => 2": 1.000000}');
+-- trailing space
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000} ');
+-- unmatched quote
+insert into tbl_dependencies values (1, '{"1" => 2": 1.000000} ');
+-- Wrong format
+insert into tbl_dependencies values (1, '{"1, 2": 1.000000}');
+insert into tbl_dependencies values (1, '{"1 => 2": 1}');
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000, " 2 => 1": 2}');
+-- space in attribute list
+insert into tbl_dependencies values (1, '{"1 3 => 2": 1.000000} ');
+-- colon in attribute list
+insert into tbl_dependencies values (1, '{"1: 2": 1.000000}');
+insert into tbl_dependencies values (1, '{"1 => 2:" 1.000000}');
+insert into tbl_dependencies values (1, '{":1 2": 1.000000}');
+insert into tbl_dependencies values (1, '{"1, 2" 1.000000}');
+-- zero/single item attribute list
+insert into tbl_dependencies values (1, '{"1": 1.000000}');
+insert into tbl_dependencies values (1, '{: 1.000000}');
+insert into tbl_dependencies values (1, '{"": 1.000000}');
+insert into tbl_dependencies values (1, '{" ": 1.000000}');
+insert into tbl_dependencies values (1, '{}');
+insert into tbl_dependencies values (1, '{:}');
+
+-- multiple consecutive characters
+insert into tbl_dependencies values (1, '{"1 =>=> 2": 1.000000}');
+insert into tbl_dependencies values (1, '{"1": 1.000000}');
+
+select * from tbl_dependencies;
+
+
+-- Test a table with columns of type pg_ndistinct and pg_dependencies
+drop table if exists tbl_dist_dep;
+create table tbl_dist_dep(i pg_ndistinct, ii pg_dependencies);
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2" : 3, "1, 3" : 3, "2, 3" : 2, "1, 2, 3" : 3}', '{"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}');
+-- unmatched quote
+insert into tbl_dist_dep values ('{1, 2 : 1}', '{"1 => 2": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{1 => 2: 1.000000}');
+insert into tbl_dist_dep values ('{1, 2 : 1}', '{1 => 2: 1.000000}');
+-- Invalid type
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2": 1}');
+insert into tbl_dist_dep values ('{"1, 2" : 1.000000}', '{"1 => 2": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1.000000 => 2.000000": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2.000000" : 1}', '{"1 => 2": 1.000000}');
+
+select * from tbl_dist_dep;
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index ee49424d6f..96d384f940 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -204,6 +204,9 @@ sub mkvcbuild
 		'src/backend/replication', 'repl_scanner.l',
 		'repl_gram.y',             'syncrep_scanner.l',
 		'syncrep_gram.y');
+	$postgres->AddFiles(
+		'src/backend/statistics', 'statistics_scanner.l',
+		'statistics_gram.y');
 	$postgres->AddFiles('src/backend/utils/adt', 'jsonpath_scan.l',
 		'jsonpath_gram.y');
 	$postgres->AddDefine('BUILDING_DLL');
diff --git a/src/tools/msvc/clean.bat b/src/tools/msvc/clean.bat
index d0e8bfbf86..146313eca2 100755
--- a/src/tools/msvc/clean.bat
+++ b/src/tools/msvc/clean.bat
@@ -90,7 +90,8 @@ if %DIST%==1 if exist src\backend\replication\repl_scanner.c del /q src\backend\
 if %DIST%==1 if exist src\backend\replication\repl_gram.c del /q src\backend\replication\repl_gram.c
 if %DIST%==1 if exist src\backend\replication\syncrep_scanner.c del /q src\backend\replication\syncrep_scanner.c
 if %DIST%==1 if exist src\backend\replication\syncrep_gram.c del /q src\backend\replication\syncrep_gram.c
-
+if %DIST%==1 if exist src\backend\statistics\statistics_scanner.c del /q src\backend\statistics\statistics_scanner.c
+if %DIST%==1 if exist src\backend\statistics\statistics_gram.c del /q src\backend\statistics\statistics_gram.c
 
 if exist src\interfaces\libpq\libpqdll.def del /q src\interfaces\libpq\libpqdll.def
 if exist src\interfaces\ecpg\compatlib\compatlib.def del /q src\interfaces\ecpg\compatlib\compatlib.def
-- 
2.30.1 (Apple Git-130)

