From 9f606df68379556b5bf155fcc69f979d7b9f743e Mon Sep 17 00:00:00 2001
From: Aleksander Alekseev <aleksander@timescale.com>
Date: Tue, 12 Apr 2022 15:58:11 +0300
Subject: [PATCH v9] Compression dictionaries for JSONB

Usage example:

CREATE TYPE mydict AS DITIONARY OF jsonb ('aaa', 'bbb', ...);
SELECT ('{"aaa":"bbb"}' ::  mydict) -> 'aaa';

The created type works as a drop-in replacement for JSONB. However,
its internal representation differs. The provided dictionary entries ('aaa',
'bbb', ..) are stored in pg_dict catalog table. When `mydict` sees one of
the entries in the document, it replaces it with the corresponding pg_dict
entry Oid. For more details regarding the compression algorithm and choosen
compromises please see the comments in the code.

In pg_type `mydict` has typtype = TYPTYPE_DICT. It works the same way
as TYPTYPE_BASE with only difference: corresponding `<type>_in`
(pg_type.typinput) and `<another-type>_<type>` (pg_cast.castfunc)
procedures receive the dictionary Oid as a `typmod` argument. This way the
procedures can distinguish `mydict1` from `mydict2` and use the proper
compression dictionary.

(NOTE: dear committer, please bump the catalog version!)

Author: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-by: Matthias van de Meent, Nikita Malakhov
Discussion: https://postgr.es/m/CAJ7c6TOtAB0z1UrksvGTStNE-herK-43bj22%3D5xVBg7S4vr5rQ%40mail.gmail.com
Discussion: https://postgr.es/m/CAJ7c6TPx7N-bVw0dZ1ASCDQKZJHhBYkT6w4HV1LzfS%2BUUTUfmA%40mail.gmail.com
---
 src/backend/catalog/Makefile             |   3 +-
 src/backend/catalog/meson.build          |   1 +
 src/backend/catalog/pg_dict.c            | 197 +++++++++
 src/backend/commands/typecmds.c          | 171 +++++++-
 src/backend/executor/functions.c         |   1 +
 src/backend/parser/gram.y                |   8 +
 src/backend/parser/parse_coerce.c        |  17 +-
 src/backend/parser/parse_type.c          |  16 +
 src/backend/tcop/utility.c               |  13 +
 src/backend/utils/adt/Makefile           |   1 +
 src/backend/utils/adt/dictionaries.c     | 482 +++++++++++++++++++++++
 src/backend/utils/adt/meson.build        |   1 +
 src/backend/utils/fmgr/funcapi.c         |   1 +
 src/bin/pg_dump/pg_dump.c                |   2 +
 src/include/catalog/meson.build          |   1 +
 src/include/catalog/pg_dict.h            |  78 ++++
 src/include/catalog/pg_proc.dat          |  20 +
 src/include/catalog/pg_type.h            |   1 +
 src/include/commands/typecmds.h          |   1 +
 src/include/nodes/parsenodes.h           |  12 +
 src/pl/plpgsql/src/pl_comp.c             |   1 +
 src/test/regress/expected/dict.out       |  47 +++
 src/test/regress/expected/oidjoins.out   |   1 +
 src/test/regress/expected/opr_sanity.out |  14 +-
 src/test/regress/parallel_schedule       |   2 +-
 src/test/regress/sql/dict.sql            |  20 +
 26 files changed, 1101 insertions(+), 11 deletions(-)
 create mode 100644 src/backend/catalog/pg_dict.c
 create mode 100644 src/backend/utils/adt/dictionaries.c
 create mode 100644 src/include/catalog/pg_dict.h
 create mode 100644 src/test/regress/expected/dict.out
 create mode 100644 src/test/regress/sql/dict.sql

diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 89a0221ec9..3b4330148e 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -33,6 +33,7 @@ OBJS = \
 	pg_conversion.o \
 	pg_db_role_setting.o \
 	pg_depend.o \
+	pg_dict.o \
 	pg_enum.o \
 	pg_inherits.o \
 	pg_largeobject.o \
@@ -61,7 +62,7 @@ CATALOG_HEADERS := \
 	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
 	pg_statistic.h pg_statistic_ext.h pg_statistic_ext_data.h \
 	pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
-	pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
+	pg_cast.h pg_dict.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
 	pg_database.h pg_db_role_setting.h pg_tablespace.h \
 	pg_authid.h pg_auth_members.h pg_shdepend.h pg_shdescription.h \
 	pg_ts_config.h pg_ts_config_map.h pg_ts_dict.h \
diff --git a/src/backend/catalog/meson.build b/src/backend/catalog/meson.build
index 0874791451..ab98236c4c 100644
--- a/src/backend/catalog/meson.build
+++ b/src/backend/catalog/meson.build
@@ -18,6 +18,7 @@ backend_sources += files(
   'pg_conversion.c',
   'pg_db_role_setting.c',
   'pg_depend.c',
+  'pg_dict.c',
   'pg_enum.c',
   'pg_inherits.c',
   'pg_largeobject.c',
diff --git a/src/backend/catalog/pg_dict.c b/src/backend/catalog/pg_dict.c
new file mode 100644
index 0000000000..456f3b5177
--- /dev/null
+++ b/src/backend/catalog/pg_dict.c
@@ -0,0 +1,197 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_dict.c
+ *	  routines to support manipulation of the pg_dict relation
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/catalog/pg_dict.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/genam.h"
+#include "access/skey.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_dict.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+/*
+ * Creates an entry in pg_dict for each of the supplied values.
+ * vals is a list of String values.
+ */
+void
+DictEntriesCreate(Oid dictTypeOid, List *vals)
+{
+	Relation	pg_dict;
+	NameData	dictentry;
+	Datum		values[Natts_pg_dict];
+	bool		nulls[Natts_pg_dict];
+	ListCell   *lc;
+	HeapTuple	tup;
+
+	if (vals == NIL)
+	{
+		/* The list is empty; do nothing. */
+		return;
+	}
+
+	memset(nulls, false, sizeof(nulls));
+
+	/*
+	 * We don't check the list of values for duplicates here. If there are
+	 * any, the user will get an unique-index violation.
+	 */
+
+	pg_dict = table_open(DictRelationId, RowExclusiveLock);
+	foreach(lc, vals)
+	{
+		Oid			oid = GetNewOidWithIndex(pg_dict, DictOidIndexId,
+											 Anum_pg_dict_oid);
+		char	   *entry = strVal(lfirst(lc));
+
+		/*
+		 * Entries are stored in a name field, for easier syscache lookup, so
+		 * check the length to make sure it's within range.
+		 */
+		if (strlen(entry) > (NAMEDATALEN - 1))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_NAME),
+					 errmsg("invalid dict entry \"%s\"", entry),
+					 errdetail("Entries must be %d bytes or less.",
+							   NAMEDATALEN - 1)));
+
+		/* AALEKSEEV TODO use bytea instead */
+		namestrcpy(&dictentry, entry);
+
+		values[Anum_pg_dict_oid - 1] = ObjectIdGetDatum(oid);
+		values[Anum_pg_dict_dicttypid - 1] = ObjectIdGetDatum(dictTypeOid);
+		values[Anum_pg_dict_dictentry - 1] = NameGetDatum(&dictentry);
+
+		tup = heap_form_tuple(RelationGetDescr(pg_dict), values, nulls);
+
+		CatalogTupleInsert(pg_dict, tup);
+		heap_freetuple(tup);
+	}
+
+	/* clean up */
+	table_close(pg_dict, RowExclusiveLock);
+}
+
+/*
+ * Returns all the entries for the dictinary with given Oid. Entries are sorted
+ * by dictentry. Note that shorter entries are considered smaller, i.e. 'abc'
+ * goes before 'abcdef'. The memory is allocated in a child memory context of
+ * the caller's memory context. It can be freed with DictEntriesFree().
+ *
+ * If there are no entries a valid but empty dictionary is returned.
+ */
+Dictionary
+DictEntriesRead(Oid dictTypeOid)
+{
+	Relation	pg_dict;
+	ScanKeyData key[1];
+	SysScanDesc scan;
+	HeapTuple	tup;
+	uint32      entries_allocated;
+	Dictionary  dict;
+	MemoryContext myctx, oldctx;
+
+	myctx = AllocSetContextCreate(CurrentMemoryContext, "DictEntriesCtx", ALLOCSET_DEFAULT_SIZES);
+	oldctx = MemoryContextSwitchTo(myctx);
+
+	dict = (Dictionary)palloc(sizeof(DictionaryData));
+
+	entries_allocated = 8;
+	dict->nentries = 0;
+	dict->entries = (DictEntry*)palloc(entries_allocated*sizeof(DictEntry));
+
+	pg_dict = table_open(DictRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_dict_dicttypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(dictTypeOid));
+
+	scan = systable_beginscan(pg_dict, DictTypIdEntryIndexId, true,
+							  NULL, 1, key);
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+	{
+		if(dict->nentries == entries_allocated)
+		{
+			entries_allocated = entries_allocated * 2;
+			dict->entries = (DictEntry*)repalloc(dict->entries, entries_allocated*sizeof(DictEntry));
+		}
+
+		dict->entries[dict->nentries].oid = ((Form_pg_dict) GETSTRUCT(tup))->oid;
+		/*
+		 * This is arguably not the fastest way to determine the length of
+		 * the entry. Alternatively, we could store a precalculated value in
+		 * the catalog. However, usually it's a good idea to keep things simple
+		 * until somebody discovers a bottleneck in this exact place and
+		 * proposes a concrete fix.
+		 */
+		dict->entries[dict->nentries].length = (uint32)strlen(((Form_pg_dict) GETSTRUCT(tup))->dictentry.data);
+		dict->entries[dict->nentries].data = palloc(dict->entries[dict->nentries].length);
+		memcpy(dict->entries[dict->nentries].data, ((Form_pg_dict) GETSTRUCT(tup))->dictentry.data, dict->entries[dict->nentries].length);
+		dict->nentries++;
+	}
+
+	systable_endscan(scan);
+	table_close(pg_dict, RowExclusiveLock);
+
+	MemoryContextSwitchTo(oldctx);
+
+	return dict;
+}
+
+/*
+ * Frees the memory allocated for the dictionary.
+ */
+void
+DictEntriesFree(Dictionary dict)
+{
+	MemoryContextDelete(GetMemoryChunkContext(dict));
+}
+
+/*
+ * Deletes all the entries for the dictinary with given Oid.
+ */
+void
+DictEntriesDelete(Oid dictTypeOid)
+{
+	Relation	pg_dict;
+	ScanKeyData key[1];
+	SysScanDesc scan;
+	HeapTuple	tup;
+
+	pg_dict = table_open(DictRelationId, RowExclusiveLock);
+
+	ScanKeyInit(&key[0],
+				Anum_pg_dict_dicttypid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(dictTypeOid));
+
+	scan = systable_beginscan(pg_dict, DictTypIdEntryIndexId, true,
+							  NULL, 1, key);
+
+	while (HeapTupleIsValid(tup = systable_getnext(scan)))
+	{
+		CatalogTupleDelete(pg_dict, &tup->t_self);
+	}
+
+	systable_endscan(scan);
+	table_close(pg_dict, RowExclusiveLock);
+}
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index b7c3dded17..e0d82416ea 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -46,6 +46,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_depend.h"
+#include "catalog/pg_dict.h"
 #include "catalog/pg_enum.h"
 #include "catalog/pg_language.h"
 #include "catalog/pg_namespace.h"
@@ -679,6 +680,14 @@ RemoveTypeById(Oid typeOid)
 	if (((Form_pg_type) GETSTRUCT(tup))->typtype == TYPTYPE_RANGE)
 		RangeDelete(typeOid);
 
+	/*
+	 * If it is a dictionary type, delete the pg_dict entries too; we don't
+	 * bother with making a dependency entry for that, so it has to be done
+	 * "by hand" here.
+	 */
+	if (((Form_pg_type) GETSTRUCT(tup))->typtype == TYPTYPE_DICT)
+		DictEntriesDelete(typeOid);
+
 	ReleaseSysCache(tup);
 
 	table_close(relation, RowExclusiveLock);
@@ -763,8 +772,8 @@ DefineDomain(CreateDomainStmt *stmt)
 
 	/*
 	 * Base type must be a plain base type, a composite type, another domain,
-	 * an enum or a range type.  Domains over pseudotypes would create a
-	 * security hole.  (It would be shorter to code this to just check for
+	 * a dict, an enum or a range type.  Domains over pseudotypes would create
+	 * a security hole.  (It would be shorter to code this to just check for
 	 * pseudotypes; but it seems safer to call out the specific typtypes that
 	 * are supported, rather than assume that all future typtypes would be
 	 * automatically supported.)
@@ -772,6 +781,7 @@ DefineDomain(CreateDomainStmt *stmt)
 	typtype = baseType->typtype;
 	if (typtype != TYPTYPE_BASE &&
 		typtype != TYPTYPE_COMPOSITE &&
+		typtype != TYPTYPE_DICT &&
 		typtype != TYPTYPE_DOMAIN &&
 		typtype != TYPTYPE_ENUM &&
 		typtype != TYPTYPE_RANGE &&
@@ -1129,6 +1139,163 @@ DefineDomain(CreateDomainStmt *stmt)
 }
 
 
+/*
+ * DefineDictionary
+ *		Registers a new dictionary.
+ */
+ObjectAddress
+DefineDictionary(CreateDictionaryStmt * stmt)
+{
+	char	   *dictName;
+	char	   *dictBaseTypeName;
+	char	   *dictArrayName;
+	Oid			dictNamespace;
+	AclResult	aclresult;
+	Oid			old_type_oid;
+	Oid			dictArrayOid;
+	ObjectAddress dictTypeAddr;
+
+	Assert(list_length(stmt->baseTypeName) == 1);
+	dictBaseTypeName = strVal(linitial(stmt->baseTypeName));
+	if (pg_strcasecmp(dictBaseTypeName, "jsonb") != 0)
+		ereport(ERROR,
+				(
+				 errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("Dictionary types currently can be used only with JSONB")
+				 )
+			);
+
+	/* Convert list of names to a name and namespace */
+	dictNamespace = QualifiedNameGetCreationNamespace(stmt->typeName,
+													  &dictName);
+
+	/* Check we have creation rights in target namespace */
+	aclresult = pg_namespace_aclcheck(dictNamespace, GetUserId(), ACL_CREATE);
+	if (aclresult != ACLCHECK_OK)
+		aclcheck_error(aclresult, OBJECT_SCHEMA,
+					   get_namespace_name(dictNamespace));
+
+	/*
+	 * Check for collision with an existing type name.  If there is one and
+	 * it's an autogenerated array, we can rename it out of the way.
+	 */
+	old_type_oid = GetSysCacheOid2(TYPENAMENSP, Anum_pg_type_oid,
+								   CStringGetDatum(dictName),
+								   ObjectIdGetDatum(dictNamespace));
+	if (OidIsValid(old_type_oid))
+	{
+		if (!moveArrayTypeName(old_type_oid, dictName, dictNamespace))
+			ereport(ERROR,
+					(errcode(ERRCODE_DUPLICATE_OBJECT),
+					 errmsg("type \"%s\" already exists", dictName)));
+	}
+
+	/* Allocate OID for array type */
+	dictArrayOid = AssignTypeArrayOid();
+
+	/* Create the pg_type entry */
+	dictTypeAddr =
+		TypeCreate(InvalidOid,	/* no predetermined type OID */
+				   dictName,	/* type name */
+				   dictNamespace,	/* namespace */
+				   InvalidOid,	/* relation oid (n/a here) */
+				   0,			/* relation kind (ditto) */
+				   GetUserId(), /* owner's ID */
+				   -1,			/* internal size: varlena, as for JSONB */
+				   TYPTYPE_DICT,	/* type-type: dictionary */
+				   TYPCATEGORY_USER,	/* type-category: user, as for JSONB */
+				   false,		/* dict types are never preferred */
+				   DEFAULT_TYPDELIM,	/* array element delimiter */
+				   F_DICTIONARY_IN, /* input procedure */
+				   F_DICTIONARY_OUT,	/* output procedure */
+				   InvalidOid,	/* receive procedure: none */
+				   InvalidOid,	/* send procedure: none */
+				   InvalidOid,	/* typmodin procedure - none */
+				   InvalidOid,	/* typmodout procedure - none */
+				   InvalidOid,	/* analyze procedure - default */
+				   InvalidOid,	/* subscript procedure - none */
+				   InvalidOid,	/* element type ID */
+				   false,		/* this is not an array type */
+				   dictArrayOid,	/* array type we are about to create */
+				   InvalidOid,	/* base type ID (only for domains) */
+				   NULL,		/* never a default type value */
+				   NULL,		/* binary default isn't sent either */
+				   false,		/* passed by value: same as for JSONB */
+				   TYPALIGN_INT,	/* int alignment */
+				   TYPSTORAGE_EXTENDED, /* TOAST strategy: fully toastable, as
+										 * JSONB */
+				   -1,			/* typMod (Domains only) */
+				   0,			/* Array dimensions of typbasetype */
+				   false,		/* Type NOT NULL */
+				   InvalidOid); /* type's collation */
+
+	/*
+	 * Create the array type that goes with it.
+	 */
+	dictArrayName = makeArrayTypeName(dictName, dictNamespace);
+
+	TypeCreate(dictArrayOid,	/* force assignment of this type OID */
+			   dictArrayName,	/* type name */
+			   dictNamespace,	/* namespace */
+			   InvalidOid,		/* relation oid (n/a here) */
+			   0,				/* relation kind (ditto) */
+			   GetUserId(),		/* owner's ID */
+			   -1,				/* internal size (always varlena) */
+			   TYPTYPE_BASE,	/* type-type (base type) */
+			   TYPCATEGORY_ARRAY,	/* type-category (array) */
+			   false,			/* array types are never preferred */
+			   DEFAULT_TYPDELIM,	/* array element delimiter */
+			   F_ARRAY_IN,		/* input procedure */
+			   F_ARRAY_OUT,		/* output procedure */
+			   F_ARRAY_RECV,	/* receive procedure */
+			   F_ARRAY_SEND,	/* send procedure */
+			   InvalidOid,		/* typmodin procedure - none */
+			   InvalidOid,		/* typmodout procedure - none */
+			   F_ARRAY_TYPANALYZE,	/* analyze procedure */
+			   F_ARRAY_SUBSCRIPT_HANDLER,	/* array subscript procedure */
+			   dictTypeAddr.objectId,	/* element type ID */
+			   true,			/* yes this is an array type */
+			   InvalidOid,		/* no further array type */
+			   InvalidOid,		/* base type ID */
+			   NULL,			/* never a default type value */
+			   NULL,			/* binary default isn't sent either */
+			   false,			/* passed by value: same as for JSONB */
+			   TYPALIGN_INT,	/* enums have int align, so do their arrays */
+			   TYPSTORAGE_EXTENDED, /* ARRAY is always toastable */
+			   -1,				/* typMod (Domains only) */
+			   0,				/* Array dimensions of typbasetype */
+			   false,			/* Type NOT NULL */
+			   InvalidOid);		/* type's collation */
+
+	pfree(dictArrayName);
+
+	/* Enter the dict's entries into pg_dict */
+	DictEntriesCreate(dictTypeAddr.objectId, stmt->vals);
+
+	/* Create casts to and from JSONB */
+	CastCreate(JSONBOID, dictTypeAddr.objectId, F_JSONB_DICTIONARY,
+			   InvalidOid, InvalidOid,
+			   COERCION_CODE_ASSIGNMENT, COERCION_METHOD_FUNCTION,
+			   DEPENDENCY_INTERNAL);
+
+	CastCreate(dictTypeAddr.objectId, JSONBOID, F_DICTIONARY_JSONB,
+			   InvalidOid, InvalidOid,
+			   COERCION_METHOD_INOUT, COERCION_METHOD_FUNCTION,
+			   DEPENDENCY_INTERNAL);
+
+	/*
+	 * Create explicit cast to bytea. This is convenient for debugging
+	 * purposes. Casting bytea to a dictionary type is dangerous and thus not
+	 * supported.
+	 */
+	CastCreate(dictTypeAddr.objectId, BYTEAOID, F_DICTIONARY_BYTEA,
+			   InvalidOid, InvalidOid,
+			   COERCION_CODE_EXPLICIT, COERCION_METHOD_FUNCTION,
+			   DEPENDENCY_INTERNAL);
+
+	return dictTypeAddr;
+}
+
 /*
  * DefineEnum
  *		Registers a new enum.
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index e134a82ff7..9674cdfe4d 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -1721,6 +1721,7 @@ check_sql_fn_retval(List *queryTreeLists,
 
 	if (fn_typtype == TYPTYPE_BASE ||
 		fn_typtype == TYPTYPE_DOMAIN ||
+		fn_typtype == TYPTYPE_DICT ||
 		fn_typtype == TYPTYPE_ENUM ||
 		fn_typtype == TYPTYPE_RANGE ||
 		fn_typtype == TYPTYPE_MULTIRANGE)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 6ca23f88c4..4aec75e55f 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -6153,6 +6153,14 @@ DefineStmt:
 					n->coldeflist = $6;
 					$$ = (Node *) n;
 				}
+			| CREATE TYPE_P any_name AS DICTIONARY OF any_name '(' opt_enum_val_list ')'
+				{
+					CreateDictionaryStmt *n = makeNode(CreateDictionaryStmt);
+					n->typeName = $3;
+					n->baseTypeName = $7;
+					n->vals = $9;
+					$$ = (Node *)n;
+				}
 			| CREATE TYPE_P any_name AS ENUM_P '(' opt_enum_val_list ')'
 				{
 					CreateEnumStmt *n = makeNode(CreateEnumStmt);
diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c
index 60908111c8..8952e1469c 100644
--- a/src/backend/parser/parse_coerce.c
+++ b/src/backend/parser/parse_coerce.c
@@ -279,7 +279,22 @@ coerce_type(ParseState *pstate, Node *node,
 		if (baseTypeId == INTERVALOID)
 			inputTypeMod = baseTypeMod;
 		else
-			inputTypeMod = -1;
+		{
+			if((int32)targetTypeId == targetTypeMod)
+			{
+				/*
+				 * For dictionaries internally we set typmod to a dictionary
+				 * OID. This allows dictionary_in() and jsonb_dictionary() to
+				 * distinguish one dictionary from another.
+				 *
+				 * It also allows us to recognize that the target type is
+				 * a dictionary here and pass a correct typmod.
+				 */
+				inputTypeMod = targetTypeMod;
+			}
+			else
+				inputTypeMod = -1;
+		}
 
 		baseType = typeidType(baseTypeId);
 
diff --git a/src/backend/parser/parse_type.c b/src/backend/parser/parse_type.c
index f7ad689459..2188002535 100644
--- a/src/backend/parser/parse_type.c
+++ b/src/backend/parser/parse_type.c
@@ -259,6 +259,10 @@ LookupTypeNameOid(ParseState *pstate, const TypeName *typeName, bool missing_ok)
  * This is equivalent to LookupTypeName, except that this will report
  * a suitable error message if the type cannot be found or is not defined.
  * Callers of this can therefore assume the result is a fully valid type.
+ *
+ * For a dictionary type it's OID is returned as a typmod. This is used by
+ * dictionary_in() and jsonb_dictionary() to distinguish one dictionary from
+ * another.
  */
 Type
 typenameType(ParseState *pstate, const TypeName *typeName, int32 *typmod_p)
@@ -305,6 +309,10 @@ typenameTypeId(ParseState *pstate, const TypeName *typeName)
  *
  * This is equivalent to typenameType, but we only hand back the type OID
  * and typmod, not the syscache entry.
+ *
+ * For a dictionary type it's OID is returned as a typmod. This is used by
+ * dictionary_in() and jsonb_dictionary() to distinguish one dictionary from
+ * another.
  */
 void
 typenameTypeIdAndMod(ParseState *pstate, const TypeName *typeName,
@@ -327,6 +335,10 @@ typenameTypeIdAndMod(ParseState *pstate, const TypeName *typeName,
  * looked up, and is passed as "typ".
  *
  * pstate is only used for error location info, and may be NULL.
+ *
+ * For a dictionary type it's OID is returned as a typmod. This is used by
+ * dictionary_in() and jsonb_dictionary() to distinguish one dictionary from
+ * another.
  */
 static int32
 typenameTypeMod(ParseState *pstate, const TypeName *typeName, Type typ)
@@ -339,6 +351,10 @@ typenameTypeMod(ParseState *pstate, const TypeName *typeName, Type typ)
 	ArrayType  *arrtypmod;
 	ParseCallbackState pcbstate;
 
+	/* For a dictionary return it's OID. */
+	if (((Form_pg_type) GETSTRUCT(typ))->typtype == TYPTYPE_DICT)
+		return (int32)((Form_pg_type) GETSTRUCT(typ))->oid;
+
 	/* Return prespecified typmod if no typmod expressions */
 	if (typeName->typmods == NIL)
 		return typeName->typemod;
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 247d0816ad..ec8a70c3fc 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -174,6 +174,7 @@ ClassifyUtilityCommandAsReadOnly(Node *parsetree)
 		case T_CreateCastStmt:
 		case T_CreateConversionStmt:
 		case T_CreateDomainStmt:
+		case T_CreateDictionaryStmt:
 		case T_CreateEnumStmt:
 		case T_CreateEventTrigStmt:
 		case T_CreateExtensionStmt:
@@ -1621,6 +1622,10 @@ ProcessUtilitySlow(ParseState *pstate,
 				}
 				break;
 
+			case T_CreateDictionaryStmt:	/* CREATE TYPE AS DICTIONARY OF */
+				address = DefineDictionary((CreateDictionaryStmt *) parsetree);
+				break;
+
 			case T_CreateEnumStmt:	/* CREATE TYPE AS ENUM */
 				address = DefineEnum((CreateEnumStmt *) parsetree);
 				break;
@@ -2772,6 +2777,10 @@ CreateCommandTag(Node *parsetree)
 			tag = CMDTAG_CREATE_TYPE;
 			break;
 
+		case T_CreateDictionaryStmt:
+			tag = CMDTAG_CREATE_TYPE;
+			break;
+
 		case T_CreateEnumStmt:
 			tag = CMDTAG_CREATE_TYPE;
 			break;
@@ -3419,6 +3428,10 @@ GetCommandLogLevel(Node *parsetree)
 			lev = LOGSTMT_DDL;
 			break;
 
+		case T_CreateDictionaryStmt:
+			lev = LOGSTMT_DDL;
+			break;
+
 		case T_CreateEnumStmt:
 			lev = LOGSTMT_DDL;
 			break;
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 0de0bbb1b8..58e25fd8bc 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -30,6 +30,7 @@ OBJS = \
 	datetime.o \
 	datum.o \
 	dbsize.o \
+	dictionaries.o \
 	domains.o \
 	encode.o \
 	enum.o \
diff --git a/src/backend/utils/adt/dictionaries.c b/src/backend/utils/adt/dictionaries.c
new file mode 100644
index 0000000000..2bb46471a4
--- /dev/null
+++ b/src/backend/utils/adt/dictionaries.c
@@ -0,0 +1,482 @@
+/*-------------------------------------------------------------------------
+ *
+ * dictionaries.c
+ *	  Conversion functions for dictionary types.
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/dictionaries.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_dict.h"
+#include "utils/fmgrprotos.h"
+#include "utils/jsonb.h"
+
+/*
+ * When compressing a data we treat it as a BLOB, in other words we don't
+ * assume anything regarding its internal representation. This is not
+ * necessarily the best and/or the only possible approach. However, it is
+ * universal and can be reused for JSONB, TEXT, XML and other types. Alternative
+ * compression methods can be introduced in the future, so that the user will
+ * be able to choose the best one for the task.
+ *
+ * The compressed data is stored in the following format:
+ *
+ * (struct varlena)
+ * dictionary_id     [uint32]
+ * decompressed_size [uint32]
+ * algorithm_version [uint8]
+ *
+ * (repeated) {
+ *   number of bytes to copy as-is     [uint8]
+ *   ... bytes to copy as-is ...
+ *   dictionary entry id, or 0 to skip [uint32]
+ * }
+ *
+ * Compressed data is a variable-length type and thus has the 'struct varlena'
+ * header. The code below doesn't consider it being a part of the payload
+ * (e.g. see the DICT_COMP_HEADER_SIZE definition).
+ *
+ * Storing dictionary_id may seem redundant, but without it dictionary_out()
+ * and dictionary_jsonb() have no way to distinguish one dictionary type from
+ * another. dictionary_in() and jsonb_dictionary() get the dictionary id through
+ * the 'typemod' argument.
+ *
+ * Currently algorithm_version is always 0. In the future it will allow us to
+ * introduce new features (e.g. usage of varints) and/or lazily migrate the
+ * data to other compression methods.
+ *
+ * Compression and decompression are implemented based on a binary search over
+ * DictEntry[] array (see pg_dict.h). For compression the array is sorted by
+ * dictentries and for decompression - by oid's.
+ *
+ */
+
+/* Size of the header in the compressed data */
+#define DICT_COMP_HEADER_SIZE (sizeof(uint32)*2 + sizeof(uint8))
+
+/* Extracts dictionary_id from the compressed data */
+#define DICT_COMP_DICTIONARY_ID(hdrp) \
+	(*(uint32*)hdrp)
+
+/* Extracts decompressed_size from the compressed data */
+#define DICT_COMP_DECOMPRESSED_SIZE(hdrp) \
+	(*(uint32*)((uint8*)hdrp + sizeof(uint32)))
+
+/* Extracts algorithm_version from the compressed data */
+#define DICT_COMP_ALGORITHM_VERSION(hdrp) \
+	(*(uint8*)((uint8*)hdrp + sizeof(uint32)*2))
+
+/* Current algorithm_version */
+#define DICT_COMP_CURRENT_ALGORITHM_VERSION 0
+
+/*
+ * bsearch_arg() callback for finding DictEntry by oid.
+ */
+static int
+find_by_oid_cb(const void *key, const void *current, void *arg)
+{
+	/* Note that oids are unsigned, so we should be careful here */
+	if (*(Oid *) key < ((DictEntry *) current)->oid)
+		return -1;
+	else if (*(Oid *) key == ((DictEntry *) current)->oid)
+		return 0;				/* found! */
+	else
+		return 1;
+}
+
+/*
+ * qsort() callback for sorting DictEntry[] by oids.
+ */
+static int
+sort_by_oids_cb(const void *left, const void *right)
+{
+	/* Note that oids are unsigned, so we should be careful here */
+	if (((DictEntry *) left)->oid < ((DictEntry *) right)->oid)
+		return -1;
+
+	/* Oids are unique so this callback will never return 0 */
+	return 1;
+}
+
+/*
+ * Finds a DictEntry which dictentry field matches *data and returns its Oid.
+ * If there are several matching entries, the largest is returned. The length
+ * of the found entry is written to *found_length on success. On failure
+ * InvalidOid is returned and found_length is zeroed.
+ *
+ * The implementation is similar to bsearch_arg(). The procedure can't be used
+ * directly because we are looking not by the exact match. We could generalize
+ * this case but the signature of the function becomes so complicated that it
+ * doesn't seem to worth the effort.
+ */
+static Oid
+compress_find_oid(Dictionary dict, const uint8 *data, Size data_size, Size *found_length)
+{
+	int			res;
+	int32		left = 0;
+	int32		right = dict->nentries - 1;
+	Size		best_length = 0;
+	Oid			best_match = InvalidOid;
+
+	while (left <= right)
+	{
+		int32		current = (left + right) / 2;
+		Size		nbytes = (Size)dict->entries[current].length;
+
+		if (nbytes > data_size)
+		{
+			/* current can be less or greater depending on the prefix */
+			res = memcmp(dict->entries[current].data, data, data_size);
+
+			/* if prefixes match, current is greater */
+			if (res == 0)
+				res = 1;
+		}
+		else
+			res = memcmp(dict->entries[current].data, data, nbytes);
+
+		if (res == 0)			/* match found */
+		{
+			best_length = nbytes;
+			best_match = dict->entries[current].oid;
+
+			if (nbytes == data_size)
+				break;
+
+			/* maybe there is a larger match */
+			left = current + 1;
+		}
+		else if (res < 0)		/* current is less */
+			left = current + 1;
+		else					/* current is greater */
+			right = current - 1;
+	}
+
+	*found_length = best_length;
+	return best_match;
+}
+
+/*
+ * Finds a DictEntry by Oid using a binary search. The dictionary should be
+ * sorted by oids before the call. Returns NULL if nothing was found.
+ */
+static DictEntry *
+decompress_find_dictentry(Dictionary dict, Oid oid)
+{
+	return (DictEntry *) bsearch_arg(&oid, dict->entries, dict->nentries, sizeof(DictEntry), find_by_oid_cb, NULL);
+}
+
+/*
+ * Estimates the worst-case compressed size of the data of given size.
+ * Worst-case scenario happens when the dictionary consists of single-character
+ * entries. In this case every byte will be encoded as 6 bytes:
+ *     0x00, (0 bytes to copy as-is), 4 bytes of the entry Oid
+ *
+ * This procedure doesn't account for the header size.
+ *
+ * AALEKSEEV FIXME don't use dictionary entries shorter than 5 bytes
+ */
+static Size
+worst_case_compressed_size(Size insize)
+{
+	return insize * 6;
+}
+
+/*
+ * Compresses the data using the provided dictionary. The dictionary should
+ * be sorted by dictentries before the call. Output buffer should be at least
+ * worst_case_compressed_size(src_size) bytes in size.
+ */
+static void
+compress(Dictionary dict,
+		 const void *src_data_, Size src_size,
+		 void *encoded_data_, Size *pencoded_size)
+{
+	Size		nbytes;
+	Size		inoffset;
+	Size		outskipoffset = 0;
+	Size		outoffset = 1;
+	uint8		skipbytes = 0;
+	const uint8 *src_data = src_data_;
+	uint8	   *encoded_data = ((uint8 *) encoded_data_);
+
+	for (inoffset = 0; inoffset < src_size;)
+	{
+		Oid			code = compress_find_oid(dict, &(src_data[inoffset]),
+											 src_size - inoffset, &nbytes);
+
+		if (code == InvalidOid)
+		{
+			skipbytes++;
+			encoded_data[outoffset] = src_data[inoffset];
+			outoffset++;
+			inoffset++;
+
+			if (skipbytes == 255)
+			{
+				encoded_data[outskipoffset] = skipbytes;
+				encoded_data[outoffset++] = 0;	/* InvalidOid */
+				encoded_data[outoffset++] = 0;
+				encoded_data[outoffset++] = 0;
+				encoded_data[outoffset++] = 0;
+				outskipoffset = outoffset++;
+				skipbytes = 0;
+			}
+		}
+		else
+		{
+			encoded_data[outskipoffset] = skipbytes;
+			encoded_data[outoffset++] = (code >> 24) & 0xFF;
+			encoded_data[outoffset++] = (code >> 16) & 0xFF;
+			encoded_data[outoffset++] = (code >> 8) & 0xFF;
+			encoded_data[outoffset++] = code & 0xFF;
+			outskipoffset = outoffset++;
+			skipbytes = 0;
+			inoffset += nbytes;
+		}
+	}
+
+	/* Double check that we didn't write out of buffer */
+	Assert(outoffset < worst_case_compressed_size(src_size));
+
+	encoded_data[outskipoffset] = skipbytes;
+	*pencoded_size = outoffset;
+}
+
+/*
+ * Report an internal error in decompress() procedure below.
+ * Under normal circumstances this should never happen.
+ */
+static void
+decompress_error()
+{
+	ereport(ERROR,
+			(
+			 errcode(ERRCODE_INTERNAL_ERROR),
+			 errmsg("Unable to decompress a dictionary type"),
+			 errdetail("The compressed data seems to be corrupted"),
+			 errhint("Please report the steps to reproduce the issue to pgsql-bugs@")
+			));
+}
+
+/*
+ * Decompresses the data using the provided dictionary. The dictionary should
+ * be sorted by oids before the call.
+ */
+static void
+decompress(Dictionary dict,
+		   const void *encoded_data_, Size encoded_size,
+		   void *decoded_data_, Size decoded_size)
+{
+	Size		inoffset = 0;
+	Size		outoffset = 0;
+	Oid			code;
+	uint8		skipbytes;
+	const uint8 *encoded_data = ((uint8 *) encoded_data_);
+	uint8	   *decoded_data = decoded_data_;
+
+	for (inoffset = 0; inoffset < encoded_size;)
+	{
+		skipbytes = encoded_data[inoffset++];
+
+		if (skipbytes > decoded_size - outoffset)
+			decompress_error();
+
+		if (skipbytes > encoded_size - inoffset)
+			decompress_error();
+
+		memcpy(
+			   &(decoded_data[outoffset]),
+			   &(encoded_data[inoffset]),
+			   skipbytes
+			);
+
+		outoffset += skipbytes;
+		inoffset += skipbytes;
+
+		if ((encoded_size == inoffset) && (decoded_size == outoffset))
+			break;				/* end of input - its OK */
+
+		if (encoded_size - inoffset < 4)
+			decompress_error();
+
+		code = (Oid) encoded_data[inoffset++];
+		code = (code << 8) | (Oid) encoded_data[inoffset++];
+		code = (code << 8) | (Oid) encoded_data[inoffset++];
+		code = (code << 8) | (Oid) encoded_data[inoffset++];
+
+		if (code != InvalidOid)
+		{
+			Size		entrylen;
+			DictEntry  *entry = decompress_find_dictentry(dict, code);
+
+			if (entry == NULL)
+				decompress_error();
+
+			Assert(entry->oid == code);
+			entrylen = (Size)entry->length;
+
+			if (entrylen > decoded_size - outoffset)
+				decompress_error();
+
+			memcpy(
+				   &(decoded_data[outoffset]),
+				   entry->data,
+				   entrylen
+				);
+
+			outoffset += entrylen;
+		}
+	}
+
+	Assert(decoded_size == outoffset);
+}
+
+/*
+ * Converts a cstring to a dictionary.
+ */
+Datum
+dictionary_in(PG_FUNCTION_ARGS)
+{
+	const char *instr = PG_GETARG_CSTRING(0);
+#ifdef NOT_USED
+	Oid			typelem = PG_GETARG_OID(1);
+#endif
+	int32		typmod = PG_GETARG_INT32(2);
+	Jsonb	   *jsonb;
+	bytea	   *dict;
+
+	Assert(typmod != -1);
+
+	jsonb = DatumGetJsonbP(DirectFunctionCall1(jsonb_in, CStringGetDatum(instr)));
+	dict = DatumGetByteaP(DirectFunctionCall2(jsonb_dictionary, JsonbPGetDatum(jsonb), Int32GetDatum(typmod)));
+
+	PG_RETURN_BYTEA_P(dict);
+}
+
+/*
+ * Converts a dictionary to a cstring.
+ */
+Datum
+dictionary_out(PG_FUNCTION_ARGS)
+{
+	bytea	   *dict = PG_GETARG_BYTEA_P(0);
+	Jsonb	   *jsonb = DatumGetJsonbP(DirectFunctionCall1(dictionary_jsonb, PointerGetDatum(dict)));
+	const char *outstr = DatumGetCString(DirectFunctionCall1(jsonb_out, JsonbPGetDatum(jsonb)));
+
+	PG_RETURN_CSTRING(outstr);
+}
+
+/*
+ * Coverts JSONB to a dictionary type.
+ *
+ * AALEKSEEV FIXME: if the compressed document ends up being larger than
+ * the original one write a corresponding flag and copy all the data as-is.
+ */
+Datum
+jsonb_dictionary(PG_FUNCTION_ARGS)
+{
+	Dictionary	dict;
+	Jsonb	   *jsonb = PG_GETARG_JSONB_P(0);
+	int32		typmod = PG_GETARG_INT32(1);
+	uint8	   *jsonb_data = (uint8 *) VARDATA(jsonb);
+	Size		jsonb_data_size = VARSIZE(jsonb) - VARHDRSZ;
+	uint8	   *encoded_buff,
+			   *encoded_header,
+			   *encoded_data;
+	Size		encoded_size,
+				encoded_buff_size;
+
+	Assert(typmod != -1);
+
+	dict = DictEntriesRead(typmod);
+	qsort((void *) dict->entries, dict->nentries, sizeof(DictEntry), sort_by_oids_cb);
+
+	encoded_buff_size = VARHDRSZ + DICT_COMP_HEADER_SIZE + worst_case_compressed_size(jsonb_data_size);
+	encoded_buff = palloc(encoded_buff_size);
+	encoded_header = (uint8 *) VARDATA(encoded_buff);
+	encoded_data = encoded_header + DICT_COMP_HEADER_SIZE;
+
+	DICT_COMP_DICTIONARY_ID(encoded_header) = typmod;
+	DICT_COMP_DECOMPRESSED_SIZE(encoded_header) = jsonb_data_size;
+	DICT_COMP_ALGORITHM_VERSION(encoded_header) = DICT_COMP_CURRENT_ALGORITHM_VERSION;
+
+	encoded_size = encoded_buff_size - VARHDRSZ - DICT_COMP_HEADER_SIZE;
+
+	compress(dict, jsonb_data, jsonb_data_size,
+			 encoded_data, &encoded_size);
+
+	encoded_size += VARHDRSZ + DICT_COMP_HEADER_SIZE;
+
+	encoded_buff = repalloc(encoded_buff, encoded_size);
+	SET_VARSIZE(encoded_buff, encoded_size);
+
+	DictEntriesFree(dict);
+
+	PG_RETURN_BYTEA_P(encoded_buff);
+}
+
+/*
+ * Converts a dictionary type to JSONB.
+ */
+Datum
+dictionary_jsonb(PG_FUNCTION_ARGS)
+{
+	bytea	   *encoded_buff = PG_GETARG_BYTEA_P(0);
+	uint8	   *encoded_header = (uint8 *) VARDATA(encoded_buff);
+	uint8	   *encoded_data = encoded_header + DICT_COMP_HEADER_SIZE;
+	Size		encoded_size = VARSIZE(encoded_buff) - VARHDRSZ - DICT_COMP_HEADER_SIZE;
+	int			alg_version = DICT_COMP_ALGORITHM_VERSION(encoded_header);
+	Oid			dictOid;		/* cannot read until algorithm version is
+								 * checked */
+	uint32		decoded_size;	/* cannot read until algorithm version is
+								 * checked */
+	Jsonb	   *jsonb;
+	uint8	   *jsonb_data;
+	Dictionary	dict;
+
+	if (alg_version > DICT_COMP_CURRENT_ALGORITHM_VERSION)
+		ereport(ERROR,
+				(
+				 errcode(ERRCODE_INTERNAL_ERROR),
+				 errmsg("Unsupported compression algorithm version"),
+				 errdetail("Saved algorithm version is %d, current version is %d",
+						   alg_version, DICT_COMP_CURRENT_ALGORITHM_VERSION),
+				 errhint("The data is either corrupted or imported from "
+						 "the future version of PostgreSQL")
+				 ));
+
+	dictOid = DICT_COMP_DICTIONARY_ID(encoded_header);
+	decoded_size = DICT_COMP_DECOMPRESSED_SIZE(encoded_header);
+
+	dict = DictEntriesRead(dictOid);
+
+	jsonb = palloc(decoded_size + VARHDRSZ);
+	jsonb_data = (uint8 *) VARDATA(jsonb);
+
+	decompress(dict, encoded_data, encoded_size, jsonb_data, decoded_size);
+
+	decoded_size += VARHDRSZ;
+	SET_VARSIZE(jsonb, decoded_size);
+
+	DictEntriesFree(dict);
+
+	PG_RETURN_JSONB_P(jsonb);
+}
+
+/*
+ * Converts a dictionary type to bytea.
+ */
+Datum
+dictionary_bytea(PG_FUNCTION_ARGS)
+{
+	bytea *compressed_data = PG_GETARG_BYTEA_P(0);
+	PG_RETURN_BYTEA_P(compressed_data);
+}
\ No newline at end of file
diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index ed9ceadfef..2ed48c1406 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -17,6 +17,7 @@ backend_sources += files(
   'datetime.c',
   'datum.c',
   'dbsize.c',
+  'dictionaries.c',
   'domains.c',
   'encode.c',
   'enum.c',
diff --git a/src/backend/utils/fmgr/funcapi.c b/src/backend/utils/fmgr/funcapi.c
index 87cbb1d3e3..efb3e739e4 100644
--- a/src/backend/utils/fmgr/funcapi.c
+++ b/src/backend/utils/fmgr/funcapi.c
@@ -1326,6 +1326,7 @@ get_type_func_class(Oid typid, Oid *base_typeid)
 		case TYPTYPE_COMPOSITE:
 			return TYPEFUNC_COMPOSITE;
 		case TYPTYPE_BASE:
+		case TYPTYPE_DICT:
 		case TYPTYPE_ENUM:
 		case TYPTYPE_RANGE:
 		case TYPTYPE_MULTIRANGE:
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index da427f4d4a..38638b88cc 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -10218,6 +10218,8 @@ dumpType(Archive *fout, const TypeInfo *tyinfo)
 		dumpDomain(fout, tyinfo);
 	else if (tyinfo->typtype == TYPTYPE_COMPOSITE)
 		dumpCompositeType(fout, tyinfo);
+	else if (tyinfo->typtype == TYPTYPE_DICT)
+		pg_log_error("AALEKSEEV TODO FIXME not implemented");
 	else if (tyinfo->typtype == TYPTYPE_ENUM)
 		dumpEnumType(fout, tyinfo);
 	else if (tyinfo->typtype == TYPTYPE_RANGE)
diff --git a/src/include/catalog/meson.build b/src/include/catalog/meson.build
index 45ffa99692..8d76de46f3 100644
--- a/src/include/catalog/meson.build
+++ b/src/include/catalog/meson.build
@@ -25,6 +25,7 @@ catalog_headers = [
   'pg_event_trigger.h',
   'pg_description.h',
   'pg_cast.h',
+  'pg_dict.h',
   'pg_enum.h',
   'pg_namespace.h',
   'pg_conversion.h',
diff --git a/src/include/catalog/pg_dict.h b/src/include/catalog/pg_dict.h
new file mode 100644
index 0000000000..fbad803468
--- /dev/null
+++ b/src/include/catalog/pg_dict.h
@@ -0,0 +1,78 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_dict.h
+ *	  definition of the "dict" system catalog (pg_dict)
+ *
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * src/include/catalog/pg_dict.h
+ *
+ * NOTES
+ *	  The Catalog.pm module reads this file and derives schema
+ *	  information.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_DICT_H
+#define PG_DICT_H
+
+#include "catalog/genbki.h"
+#include "catalog/pg_dict_d.h"
+
+#include "nodes/pg_list.h"
+
+/* ----------------
+ *		pg_dict definition.  cpp turns this into
+ *		typedef struct FormData_pg_dict
+ * ----------------
+ */
+CATALOG(pg_dict,9861,DictRelationId)
+{
+	Oid			oid;			/* oid */
+	Oid			dicttypid BKI_LOOKUP(pg_type);	/* OID of owning dict type */
+    /* AALEKSEEV TODO refactor to bytea */
+	NameData	dictentry;		/* text representation of the dictionary entry */
+} FormData_pg_dict;
+
+/* ----------------
+ *		Form_pg_dict corresponds to a pointer to a tuple with
+ *		the format of pg_dict relation.
+ * ----------------
+ */
+typedef FormData_pg_dict *Form_pg_dict;
+
+DECLARE_UNIQUE_INDEX_PKEY(pg_dict_oid_index, 9862, DictOidIndexId, on pg_dict using btree(oid oid_ops));
+DECLARE_UNIQUE_INDEX(pg_dict_typid_entry_index, 9863, DictTypIdEntryIndexId, on pg_dict using btree(dicttypid oid_ops, dictentry name_ops));
+
+
+/*
+ * DictEntry type represents one entry in the given dictionary.
+ */
+typedef struct
+{
+	Oid         oid;    /* entry id */
+	uint32      length; /* entry length */
+	uint8*      data;   /* entry data */
+} DictEntry;
+
+/*
+ * Dictionary type represents all the entries in the given dictionary.
+ */
+typedef struct
+{
+	uint32      nentries; /* number of entries */
+	DictEntry  *entries; /* array of entries */
+} DictionaryData;
+
+typedef DictionaryData *Dictionary;
+
+/*
+ * prototypes for functions in pg_dict.c
+ */
+extern void DictEntriesCreate(Oid dictTypeOid, List *vals);
+extern void DictEntriesFree(Dictionary dict);
+extern Dictionary DictEntriesRead(Oid dictTypeOid);
+extern void DictEntriesDelete(Oid dictTypeOid);
+
+#endif							/* PG_DICT_H */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 20f5aa56ea..5c31bb58a3 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -9966,6 +9966,26 @@
   proname => 'jsonb_path_match_opr', prorettype => 'bool',
   proargtypes => 'jsonb jsonpath', prosrc => 'jsonb_path_match_opr' },
 
+# dictionaries
+{ oid => '9864', descr => 'Converts a cstring to a dictionary',
+  proname => 'dictionary_in', prorettype => 'any', proargtypes => 'cstring oid int4',
+  prosrc => 'dictionary_in' },
+{ oid => '9865', descr => 'Converts a dictionary to a cstring',
+  proname => 'dictionary_out', prorettype => 'cstring', proargtypes => 'any',
+  prosrc => 'dictionary_out' },
+{ oid => '9866', descr => 'Coverts JSONB to a dictionary type',
+  proname => 'jsonb_dictionary', proisstrict => 'f', provolatile => 's',
+  prorettype => 'any', proargtypes => 'jsonb int4',
+  prosrc => 'jsonb_dictionary' },
+{ oid => '9867', descr => 'Converts a dictionary type to JSONB',
+  proname => 'dictionary_jsonb', proisstrict => 'f', provolatile => 's',
+  prorettype => 'jsonb', proargtypes => 'any',
+  prosrc => 'dictionary_jsonb' },
+{ oid => '9868', descr => 'Converts a dictionary type to a byte array',
+  proname => 'dictionary_bytea', proisstrict => 'f', provolatile => 's',
+  prorettype => 'jsonb', proargtypes => 'any',
+  prosrc => 'dictionary_bytea' },
+
 # historical int8/txid_snapshot variants of xid8 functions
 { oid => '2939', descr => 'I/O',
   proname => 'txid_snapshot_in', prorettype => 'txid_snapshot',
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index 48a2559137..91c6bfae99 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -272,6 +272,7 @@ DECLARE_UNIQUE_INDEX(pg_type_typname_nsp_index, 2704, TypeNameNspIndexId, on pg_
  */
 #define  TYPTYPE_BASE		'b' /* base type (ordinary scalar type) */
 #define  TYPTYPE_COMPOSITE	'c' /* composite (e.g., table's rowtype) */
+#define  TYPTYPE_DICT		'D' /* dictionary */
 #define  TYPTYPE_DOMAIN		'd' /* domain over another type */
 #define  TYPTYPE_ENUM		'e' /* enumerated type */
 #define  TYPTYPE_MULTIRANGE	'm' /* multirange type */
diff --git a/src/include/commands/typecmds.h b/src/include/commands/typecmds.h
index 532bc49c38..a3ef87741d 100644
--- a/src/include/commands/typecmds.h
+++ b/src/include/commands/typecmds.h
@@ -24,6 +24,7 @@
 extern ObjectAddress DefineType(ParseState *pstate, List *names, List *parameters);
 extern void RemoveTypeById(Oid typeOid);
 extern ObjectAddress DefineDomain(CreateDomainStmt *stmt);
+extern ObjectAddress DefineDictionary(CreateDictionaryStmt *stmt);
 extern ObjectAddress DefineEnum(CreateEnumStmt *stmt);
 extern ObjectAddress DefineRange(ParseState *pstate, CreateRangeStmt *stmt);
 extern ObjectAddress AlterEnum(AlterEnumStmt *stmt);
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 7caff62af7..8b127086c9 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -3296,6 +3296,18 @@ typedef struct CompositeTypeStmt
 	List	   *coldeflist;		/* list of ColumnDef nodes */
 } CompositeTypeStmt;
 
+/* ----------------------
+ *		Create Type Statement, dictionary types
+ * ----------------------
+ */
+typedef struct CreateDictionaryStmt
+{
+	NodeTag		type;
+	List	   *typeName;		/* qualified name (list of String) */
+	List	   *baseTypeName;	/* qualified base type name (list of String) */
+	List	   *vals;			/* dictionary entries (list of String) */
+} CreateDictionaryStmt;
+
 /* ----------------------
  *		Create Type Statement, enum types
  * ----------------------
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index b286f2a50c..2754098f7f 100644
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -2123,6 +2123,7 @@ build_datatype(HeapTuple typeTup, int32 typmod,
 	switch (typeStruct->typtype)
 	{
 		case TYPTYPE_BASE:
+		case TYPTYPE_DICT:
 		case TYPTYPE_ENUM:
 		case TYPTYPE_RANGE:
 		case TYPTYPE_MULTIRANGE:
diff --git a/src/test/regress/expected/dict.out b/src/test/regress/expected/dict.out
new file mode 100644
index 0000000000..699e76b654
--- /dev/null
+++ b/src/test/regress/expected/dict.out
@@ -0,0 +1,47 @@
+--
+-- Compression dictionaries tests
+--
+-- Dictionary types currently can be used only with JSONB
+\set ON_ERROR_STOP 0
+CREATE TYPE textdict AS DICTIONARY OF TEXT ('abcdef', 'ghijkl');
+ERROR:  Dictionary types currently can be used only with JSONB
+\set ON_ERROR_STOP 1
+-- Simple happy path
+CREATE TYPE mydict AS DICTIONARY OF JSONB ('abcdef', 'ghijkl');
+SELECT dictentry FROM pg_dict ORDER BY dictentry;
+ dictentry 
+-----------
+ abcdef
+ ghijkl
+(2 rows)
+
+SELECT '{"abcdef":"ghijkl"}' :: mydict;
+        mydict        
+----------------------
+ {"abcdef": "ghijkl"}
+(1 row)
+
+SELECT '{"abcdef":"ghijkl"}' :: mydict :: jsonb;
+        jsonb         
+----------------------
+ {"abcdef": "ghijkl"}
+(1 row)
+
+SELECT '{"abcdef":"ghijkl"}' :: jsonb :: mydict;
+        mydict        
+----------------------
+ {"abcdef": "ghijkl"}
+(1 row)
+
+SELECT ('{"abcdef":"ghijkl"}' :: mydict) -> 'abcdef';
+ ?column? 
+----------
+ "ghijkl"
+(1 row)
+
+DROP TYPE mydict;
+SELECT dictentry FROM pg_dict ORDER BY dictentry;
+ dictentry 
+-----------
+(0 rows)
+
diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out
index 215eb899be..91bc491a6c 100644
--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -182,6 +182,7 @@ NOTICE:  checking pg_description {classoid} => pg_class {oid}
 NOTICE:  checking pg_cast {castsource} => pg_type {oid}
 NOTICE:  checking pg_cast {casttarget} => pg_type {oid}
 NOTICE:  checking pg_cast {castfunc} => pg_proc {oid}
+NOTICE:  checking pg_dict {dicttypid} => pg_type {oid}
 NOTICE:  checking pg_enum {enumtypid} => pg_type {oid}
 NOTICE:  checking pg_namespace {nspowner} => pg_authid {oid}
 NOTICE:  checking pg_conversion {connamespace} => pg_namespace {oid}
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 330eb0f765..f435feb108 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -388,11 +388,12 @@ WHERE 'cstring'::regtype = ANY (p1.proargtypes)
     AND NOT EXISTS(SELECT 1 FROM pg_conversion WHERE conproc = p1.oid)
     AND p1.oid != 'shell_in(cstring)'::regprocedure
 ORDER BY 1;
- oid  |   proname    
-------+--------------
+ oid  |    proname    
+------+---------------
  2293 | cstring_out
  2501 | cstring_send
-(2 rows)
+ 9864 | dictionary_in
+(3 rows)
 
 -- Likewise, look for functions that return cstring and aren't datatype output
 -- functions nor typmod output functions.
@@ -405,11 +406,12 @@ WHERE  p1.prorettype = 'cstring'::regtype
     AND NOT EXISTS(SELECT 1 FROM pg_type WHERE typmodout = p1.oid)
     AND p1.oid != 'shell_out(void)'::regprocedure
 ORDER BY 1;
- oid  |   proname    
-------+--------------
+ oid  |    proname     
+------+----------------
  2292 | cstring_in
  2500 | cstring_recv
-(2 rows)
+ 9865 | dictionary_out
+(3 rows)
 
 -- Check for length inconsistencies between the various argument-info arrays.
 SELECT p1.oid, p1.proname
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 9a139f1e24..385398545b 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -111,7 +111,7 @@ test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combo
 # ----------
 # Another group of parallel tests (JSON related)
 # ----------
-test: json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath
+test: dict json jsonb json_encoding jsonpath jsonpath_encoding jsonb_jsonpath
 
 # ----------
 # Another group of parallel tests
diff --git a/src/test/regress/sql/dict.sql b/src/test/regress/sql/dict.sql
new file mode 100644
index 0000000000..27ecd6a451
--- /dev/null
+++ b/src/test/regress/sql/dict.sql
@@ -0,0 +1,20 @@
+--
+-- Compression dictionaries tests
+--
+
+-- Dictionary types currently can be used only with JSONB
+\set ON_ERROR_STOP 0
+CREATE TYPE textdict AS DICTIONARY OF TEXT ('abcdef', 'ghijkl');
+\set ON_ERROR_STOP 1
+
+-- Simple happy path
+CREATE TYPE mydict AS DICTIONARY OF JSONB ('abcdef', 'ghijkl');
+SELECT dictentry FROM pg_dict ORDER BY dictentry;
+
+SELECT '{"abcdef":"ghijkl"}' :: mydict;
+SELECT '{"abcdef":"ghijkl"}' :: mydict :: jsonb;
+SELECT '{"abcdef":"ghijkl"}' :: jsonb :: mydict;
+SELECT ('{"abcdef":"ghijkl"}' :: mydict) -> 'abcdef';
+
+DROP TYPE mydict;
+SELECT dictentry FROM pg_dict ORDER BY dictentry;
-- 
2.38.1

