From d7e35cfaa7efef534bd81ab21590e243db1aa685 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdokimov@tantorlabs.ru>
Date: Fri, 4 Jul 2025 11:24:56 +0300
Subject: [PATCH v11] Expose cache hit statistics in Memoize node EXPLAIN
 output.

This patch adds additional information to the Memoize node's EXPLAIN output:
estimated cache capacity, number of distinct keys, total lookups, and
cache hit percentage.
---
 src/backend/commands/explain.c          | 20 ++++++++++++++++++++
 src/backend/optimizer/path/costsize.c   | 14 ++++++++++----
 src/backend/optimizer/plan/createplan.c | 13 ++++++++++---
 src/backend/optimizer/util/pathnode.c   | 11 ++++++++---
 src/include/nodes/pathnodes.h           |  4 +++-
 src/include/nodes/plannodes.h           | 10 ++++++++++
 src/include/optimizer/pathnode.h        |  2 +-
 7 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 7e2792ead71..c3c52fbe190 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3616,6 +3616,26 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es)
 	ExplainPropertyText("Cache Key", keystr.data, es);
 	ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es);
 
+	if (es->costs)
+	{
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str, "Estimates: capacity=%u distinct keys=%.0f lookups=%.0f hit percent=%.2f%%\n",
+							 ((Memoize *) plan)->est_entries,
+							 ((Memoize *) plan)->est_unique_keys,
+							 ((Memoize *) plan)->est_calls,
+							 ((Memoize *) plan)->est_hit_ratio * 100.0);
+		}
+		else
+		{
+			ExplainPropertyUInteger("Estimated Capacity", NULL, ((Memoize *) plan)->est_entries, es);
+			ExplainPropertyFloat("Estimated Distinct Lookup Keys", NULL, ((Memoize *) plan)->est_unique_keys, 0, es);
+			ExplainPropertyFloat("Estimated Lookups", NULL, ((Memoize *) plan)->est_calls, 0, es);
+			ExplainPropertyFloat("Estimated Hit Percent", NULL, ((Memoize *) plan)->est_hit_ratio * 100.0, 2, es);
+		}
+	}
+
 	pfree(keystr.data);
 
 	if (!es->analyze)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1f04a2c182c..3a60c55803c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2572,7 +2572,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	Cost		input_startup_cost = mpath->subpath->startup_cost;
 	Cost		input_total_cost = mpath->subpath->total_cost;
 	double		tuples = mpath->subpath->rows;
-	double		calls = mpath->calls;
+	double		est_calls = mpath->est_calls;
 	int			width = mpath->subpath->pathtarget->width;
 
 	double		hash_mem_bytes;
@@ -2604,7 +2604,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	est_cache_entries = floor(hash_mem_bytes / est_entry_bytes);
 
 	/* estimate on the distinct number of parameter values */
-	ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, NULL,
+	ndistinct = estimate_num_groups(root, mpath->param_exprs, est_calls, NULL,
 									&estinfo);
 
 	/*
@@ -2616,7 +2616,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	 * certainly mean a MemoizePath will never survive add_path().
 	 */
 	if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0)
-		ndistinct = calls;
+		ndistinct = est_calls;
 
 	/*
 	 * Since we've already estimated the maximum number of entries we can
@@ -2630,6 +2630,9 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	mpath->est_entries = Min(Min(ndistinct, est_cache_entries),
 							 PG_UINT32_MAX);
 
+	/* Remember the ndistinct estimate for EXPLAIN */
+	mpath->est_unique_keys = ndistinct;
+
 	/*
 	 * When the number of distinct parameter values is above the amount we can
 	 * store in the cache, then we'll have to evict some entries from the
@@ -2644,9 +2647,12 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
 	 * must look at how many scans are estimated in total for this node and
 	 * how many of those scans we expect to get a cache hit.
 	 */
-	hit_ratio = ((calls - ndistinct) / calls) *
+	hit_ratio = ((est_calls - ndistinct) / est_calls) *
 		(est_cache_entries / Max(ndistinct, est_cache_entries));
 
+	/* Remember the hit ratio estimate for EXPLAIN */
+	mpath->est_hit_ratio = hit_ratio;
+
 	Assert(hit_ratio >= 0 && hit_ratio <= 1.0);
 
 	/*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8a9f1d7a943..331b0b66a69 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -284,7 +284,9 @@ static Material *make_material(Plan *lefttree);
 static Memoize *make_memoize(Plan *lefttree, Oid *hashoperators,
 							 Oid *collations, List *param_exprs,
 							 bool singlerow, bool binary_mode,
-							 uint32 est_entries, Bitmapset *keyparamids);
+							 uint32 est_entries, Bitmapset *keyparamids,
+							 double est_calls, double est_unique_keys,
+							 double est_hit_ratio);
 static WindowAgg *make_windowagg(List *tlist, WindowClause *wc,
 								 int partNumCols, AttrNumber *partColIdx, Oid *partOperators, Oid *partCollations,
 								 int ordNumCols, AttrNumber *ordColIdx, Oid *ordOperators, Oid *ordCollations,
@@ -1753,7 +1755,8 @@ create_memoize_plan(PlannerInfo *root, MemoizePath *best_path, int flags)
 
 	plan = make_memoize(subplan, operators, collations, param_exprs,
 						best_path->singlerow, best_path->binary_mode,
-						best_path->est_entries, keyparamids);
+						best_path->est_entries, keyparamids, best_path->est_calls,
+						best_path->est_unique_keys, best_path->est_hit_ratio);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
 
@@ -6749,7 +6752,8 @@ materialize_finished_plan(Plan *subplan)
 static Memoize *
 make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 			 List *param_exprs, bool singlerow, bool binary_mode,
-			 uint32 est_entries, Bitmapset *keyparamids)
+			 uint32 est_entries, Bitmapset *keyparamids, double est_calls,
+			 double est_unique_keys, double est_hit_ratio)
 {
 	Memoize    *node = makeNode(Memoize);
 	Plan	   *plan = &node->plan;
@@ -6767,6 +6771,9 @@ make_memoize(Plan *lefttree, Oid *hashoperators, Oid *collations,
 	node->binary_mode = binary_mode;
 	node->est_entries = est_entries;
 	node->keyparamids = keyparamids;
+	node->est_calls = est_calls;
+	node->est_unique_keys = est_unique_keys;
+	node->est_hit_ratio = est_hit_ratio;
 
 	return node;
 }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 9cc602788ea..1812eb55821 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1689,7 +1689,7 @@ create_material_path(RelOptInfo *rel, Path *subpath)
 MemoizePath *
 create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 					List *param_exprs, List *hash_operators,
-					bool singlerow, bool binary_mode, double calls)
+					bool singlerow, bool binary_mode, double est_calls)
 {
 	MemoizePath *pathnode = makeNode(MemoizePath);
 
@@ -1710,7 +1710,6 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	pathnode->param_exprs = param_exprs;
 	pathnode->singlerow = singlerow;
 	pathnode->binary_mode = binary_mode;
-	pathnode->calls = clamp_row_est(calls);
 
 	/*
 	 * For now we set est_entries to 0.  cost_memoize_rescan() does all the
@@ -1720,6 +1719,12 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	 */
 	pathnode->est_entries = 0;
 
+	pathnode->est_calls = clamp_row_est(est_calls);
+
+	/* These will also be set later in cost_memoize_rescan() */
+	pathnode->est_unique_keys = 0;
+	pathnode->est_hit_ratio = 0;
+
 	/* we should not generate this path type when enable_memoize=false */
 	Assert(enable_memoize);
 	pathnode->path.disabled_nodes = subpath->disabled_nodes;
@@ -4259,7 +4264,7 @@ reparameterize_path(PlannerInfo *root, Path *path,
 													mpath->hash_operators,
 													mpath->singlerow,
 													mpath->binary_mode,
-													mpath->calls);
+													mpath->est_calls);
 			}
 		default:
 			break;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 6567759595d..40166e22a49 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2131,10 +2131,12 @@ typedef struct MemoizePath
 								 * complete after caching the first record. */
 	bool		binary_mode;	/* true when cache key should be compared bit
 								 * by bit, false when using hash equality ops */
-	Cardinality calls;			/* expected number of rescans */
 	uint32		est_entries;	/* The maximum number of entries that the
 								 * planner expects will fit in the cache, or 0
 								 * if unknown */
+	Cardinality est_calls;		/* expected number of rescans */
+	Cardinality est_unique_keys;	/* estimated unique keys, for EXPLAIN */
+	double		est_hit_ratio;	/* estimated cache hit ratio, for EXPLAIN */
 } MemoizePath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 4f59e30d62d..008328577cc 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1056,6 +1056,16 @@ typedef struct Memoize
 
 	/* paramids from param_exprs */
 	Bitmapset  *keyparamids;
+
+	/* Estimated number of rescans, for EXPLAIN */
+	Cardinality est_calls;
+
+	/* Estimated number of distinct lookup keys, for EXPLAIN */
+	Cardinality est_unique_keys;
+
+	/* Estimated cache hit ratio, for EXPLAIN */
+	double		est_hit_ratio;
+
 } Memoize;
 
 /* ----------------
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 60dcdb77e41..95f98b0ba40 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -90,7 +90,7 @@ extern MemoizePath *create_memoize_path(PlannerInfo *root,
 										List *hash_operators,
 										bool singlerow,
 										bool binary_mode,
-										double calls);
+										double est_calls);
 extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
 									  Path *subpath, SpecialJoinInfo *sjinfo);
 extern GatherPath *create_gather_path(PlannerInfo *root,
-- 
2.43.0

