From 3da1e483571ca91191f133e0df1de7009483cd9f Mon Sep 17 00:00:00 2001
From: "David E. Wheeler" <david@justatheory.com>
Date: Mon, 5 Feb 2024 09:09:18 -0500
Subject: [PATCH v2] Add parse_type() SQL function

The `parse_type()` function uses the underlying `parseTypeString()` C
function to parse a string representing a data type into a type ID and
typmod suitabld for passing to `format_type()`. This allows one to
derive the formal SQL name for a type from a string that may be an
alias:

    SELECT format_type(p.typid, p.typmod)
      FROM parse_type('timestamp(4)') p;

            format_type
    --------------------------------
    timestamp(4) without time zone

Also accounts for data typs that require the SQL grammar to be parsed:

    SELECT format_type(p.typid, p.typmod)
      FROM parse_type('interval second(0)') p;

        format_type
    --------------------
    interval second(0)

Useful for unit tests for against column data types, for example.
Originally written by Erik Wienhold for use in pgTAP.
---
 doc/src/sgml/func.sgml                    | 32 ++++++++++-
 src/backend/utils/adt/format_type.c       | 70 +++++++++++++++++++++++
 src/include/catalog/pg_proc.dat           |  4 ++
 src/include/utils/builtins.h              |  2 +
 src/test/regress/expected/create_type.out | 53 +++++++++++++++++
 src/test/regress/sql/create_type.sql      | 28 +++++++++
 6 files changed, 188 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 6788ba8ef4..5344a870ed 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -24754,7 +24754,7 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
 
      <tbody>
       <row>
-       <entry role="func_table_entry"><para role="func_signature">
+       <entry id="format_type" role="func_table_entry"><para role="func_signature">
         <indexterm>
          <primary>format_type</primary>
         </indexterm>
@@ -24768,6 +24768,36 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
        </para></entry>
       </row>
 
+      <row>
+       <entry id="parse_type" role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>parse_type</primary>
+        </indexterm>
+        <function>parse_type</function> ( <parameter>type</parameter> <type>text</type> )
+        <returnvalue>record</returnvalue>
+        ( <parameter>typid</parameter> <type>oid</type>,
+          <parameter>typmod</parameter> <type>int4</type> )
+       </para>
+       <para>
+        Parses a string representing an SQL type declaration as used in a
+        <command>CREATE TABLE</command> statement, optionally schema-qualified.
+        Returns a record with two fields, <parameter>typid</parameter> and
+        <parameter>typmod</parameter>, representing the OID and modifier for the
+        type. These correspond to the parameters to pass to the
+        <link linkend="format_type"><function>format_type</function> function.</link>
+        Returns a null record on null input.
+       </para>
+       <para>
+        For example:
+<programlisting>
+SELECT format_type(p.typid, p.typmod) FROM parse_type('timestamp(4)') p;
+        format_type
+ --------------------------------
+  timestamp(4) without time zone
+</programlisting>
+       </para></entry>
+      </row>
+
       <row>
        <entry id="pg-char-to-encoding" role="func_table_entry"><para role="func_signature">
         <indexterm>
diff --git a/src/backend/utils/adt/format_type.c b/src/backend/utils/adt/format_type.c
index 28ba0fbd19..91e1064a34 100644
--- a/src/backend/utils/adt/format_type.c
+++ b/src/backend/utils/adt/format_type.c
@@ -26,6 +26,9 @@
 #include "utils/lsyscache.h"
 #include "utils/numeric.h"
 #include "utils/syscache.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "parser/parse_type.h"
 
 static char *printTypmod(const char *typname, int32 typmod, Oid typmodout);
 
@@ -482,3 +485,70 @@ oidvectortypes(PG_FUNCTION_ARGS)
 
 	PG_RETURN_TEXT_P(cstring_to_text(result));
 }
+
+/*
+ * Given a string that is supposed to be a SQL-compatible type declaration,
+ * such as "int4" or "integer" or "character varying(32)", parse
+ * the string and convert it to a type OID and type modifier.
+ *
+ * Raises an error on an invalid type.
+ */
+
+/*
+ * parse_type() is the inverse of pg_catalog.format_type(): it takes a string
+ * representing an SQL-compatible type declaration, such as "int4" or "integer"
+ * or "character varying(32)", parses it, and returns the OID and type modifier.
+ * Returns NULL for an invalid type.
+ *
+ * Internally it relies on the Postgres core parseTypeString() function defined
+ * in src/backend/parser/parse_type.c.
+ */
+PG_FUNCTION_INFO_V1(parse_type);
+
+Datum
+parse_type(PG_FUNCTION_ARGS)
+{
+#define PARSE_TYPE_STRING_COLS 2 /* Returns two columns. */
+	const char *type;            /* the type string we want to resolve */
+	Oid         typid;           /* the resolved type oid */
+	int32       typmod;          /* the resolved type modifier */
+	TupleDesc   tupdesc;
+	HeapTuple   rettuple;
+	Datum       values[PARSE_TYPE_STRING_COLS] = {0};
+	bool        nulls[PARSE_TYPE_STRING_COLS] = {0};
+
+	if (PG_ARGISNULL(0)) {
+        PG_RETURN_NULL();
+	}
+
+	type = text_to_cstring(PG_GETARG_TEXT_PP(0));
+
+	/*
+	 * Build a tuple descriptor for our result type; return an error if not
+	 * called in a context that expects a record.
+	 */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) {
+		ereport(
+			ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			errmsg("function returning record called in context that cannot accept type record"))
+		);
+	}
+
+	BlessTupleDesc(tupdesc);
+
+	/*
+	 * Parse type-name argument to obtain type OID and encoded typmod. We don't
+	 * need to check for parseTypeString failure, but just let the error be
+	 * raised. The 0 arg works both as the `Node *escontext` arg in Postgres 16
+	 * and the `bool missing_ok` arg in 9.4-15.
+	 */
+	(void) parseTypeString(type, &typid, &typmod, 0);
+
+	/* Create and return tuple. */
+	values[0] = typid;
+	values[1] = typmod;
+	rettuple = heap_form_tuple(tupdesc, values, nulls);
+	return HeapTupleGetDatum(rettuple);
+#undef PARSE_TYPE_STRING_COLS
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 29af4ce65d..31d7024010 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -2184,6 +2184,10 @@
 { oid => '1081', descr => 'format a type oid and atttypmod to canonical SQL',
   proname => 'format_type', proisstrict => 'f', provolatile => 's',
   prorettype => 'text', proargtypes => 'oid int4', prosrc => 'format_type' },
+{ oid => '8401', descr => 'parse a type string into its a type oid and atttypmod',
+  proname => 'parse_type', proisstrict => 'f', provolatile => 's',
+  prorettype => 'record', proargtypes => 'text', prosrc => 'parse_type',
+  proallargtypes => '{text,oid,int4}', proargmodes => '{i,o,o}', proargnames => '{typname,typid,typmod}' },
 { oid => '1084', descr => 'I/O',
   proname => 'date_in', provolatile => 's', prorettype => 'date',
   proargtypes => 'cstring', prosrc => 'date_in' },
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 359c570f23..264ad090be 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -133,6 +133,8 @@ extern char *format_type_with_typemod(Oid type_oid, int32 typemod);
 
 extern int32 type_maximum_size(Oid type_oid, int32 typemod);
 
+extern Datum parse_type(PG_FUNCTION_ARGS);
+
 /* quote.c */
 extern char *quote_literal_cstr(const char *rawstr);
 
diff --git a/src/test/regress/expected/create_type.out b/src/test/regress/expected/create_type.out
index 7383fcdbb1..84ea71090c 100644
--- a/src/test/regress/expected/create_type.out
+++ b/src/test/regress/expected/create_type.out
@@ -249,6 +249,59 @@ select format_type('bpchar'::regtype, -1);
  bpchar
 (1 row)
 
+-- Test parse_type
+SELECT * FROM parse_type('text') p(typid, typmod);
+ typid | typmod 
+-------+--------
+    25 |     -1
+(1 row)
+
+SELECT * FROM parse_type(NULL) p(typid, typmod);
+ typid | typmod 
+-------+--------
+       |       
+(1 row)
+
+-- Test parse_type errors
+SELECT parse_type('nonesuch'); -- error expected
+ERROR:  type "nonesuch" does not exist
+SELECT parse_type('interval nonesuch'); -- grammar error expected
+ERROR:  syntax error at or near "nonesuch"
+LINE 1: SELECT parse_type('interval nonesuch');
+                 ^
+CONTEXT:  invalid type name "interval nonesuch"
+SELECT parse_type('year(4)'); -- grammar error expected
+ERROR:  type "year" does not exist
+-- Test parse_type with various aliases and grammar-based types
+WITH s(s) AS (
+    SELECT * FROM unnest(ARRAY[
+        'timestamp(4)',
+        'interval(0)',
+        'interval second(0)',
+        'timestamptz',
+        'timestamptz(6)',
+        'varchar',
+        'varchar(128)',
+        'mytab'
+    ])
+),
+p(typid, typmod) AS (
+    SELECT ((parse_type(s)).*)
+      FROM s
+)
+SELECT format_type(typid, typmod) FROM p;
+          format_type           
+--------------------------------
+ timestamp(4) without time zone
+ interval(0)
+ interval second(0)
+ timestamp with time zone
+ timestamp(6) with time zone
+ character varying
+ character varying(128)
+ mytab
+(8 rows)
+
 -- Test non-error-throwing APIs using widget, which still throws errors
 SELECT pg_input_is_valid('(1,2,3)', 'widget');
  pg_input_is_valid 
diff --git a/src/test/regress/sql/create_type.sql b/src/test/regress/sql/create_type.sql
index c25018029c..e4700b8719 100644
--- a/src/test/regress/sql/create_type.sql
+++ b/src/test/regress/sql/create_type.sql
@@ -192,6 +192,34 @@ select format_type('bpchar'::regtype, null);
 -- this behavior difference is intentional
 select format_type('bpchar'::regtype, -1);
 
+-- Test parse_type
+SELECT * FROM parse_type('text') p(typid, typmod);
+SELECT * FROM parse_type(NULL) p(typid, typmod);
+
+-- Test parse_type errors
+SELECT parse_type('nonesuch'); -- error expected
+SELECT parse_type('interval nonesuch'); -- grammar error expected
+SELECT parse_type('year(4)'); -- grammar error expected
+
+-- Test parse_type with various aliases and grammar-based types
+WITH s(s) AS (
+    SELECT * FROM unnest(ARRAY[
+        'timestamp(4)',
+        'interval(0)',
+        'interval second(0)',
+        'timestamptz',
+        'timestamptz(6)',
+        'varchar',
+        'varchar(128)',
+        'mytab'
+    ])
+),
+p(typid, typmod) AS (
+    SELECT ((parse_type(s)).*)
+      FROM s
+)
+SELECT format_type(typid, typmod) FROM p;
+
 -- Test non-error-throwing APIs using widget, which still throws errors
 SELECT pg_input_is_valid('(1,2,3)', 'widget');
 SELECT pg_input_is_valid('(1,2)', 'widget');  -- hard error expected
-- 
2.43.0

