Hi Tom, Hi Matthias,

you are right of course. I have looked at the code from printtup.c and
made a new version of the patch.

Thanks for the MemoryContextReset hint too (@Matthias)

This time is called  EXPLAIN(ANALYZE,SERIALIZE) (hey, it also sounds
nicer phonetically)

If the option SERIALIZE is active, the output functions are called and
they perform the detoasting, which I have even checked.

So things are better this way, however I hardcoded the output option
"Text" (format=0). In printtup.c there is an incoming array which
applies Text (format=0) or Binary (format=1) for each column
individually. I am not sure whether this is even needed. I left in the
if-statement from printtup.c which calls the binary output method of a
given type. The result of the output is ignored and apparently free'd
because of the memory-context-reset at the end.

Please also note, that I added a call to DestReceiver's rDestroy hook,
which was missing from explain.c before altogether.

Feedback is appreciated.

/Stepan


On 12.09.23 17:26, Tom Lane wrote:
Matthias van de Meent <boekewurm+postg...@gmail.com> writes:
Hmm, maybe we should measure the overhead of serializing the tuples instead.
The difference between your patch and "serializing the tuples, but not
sending them" is that serializing also does the detoasting, but also
includes any time spent in the serialization functions of the type. So
an option "SERIALIZE" which measures all the time the server spent on
the query (except the final step of sending the bytes to the client)
would likely be more useful than "just" detoasting.
+1, that was my immediate reaction to the proposal as well.  Some
output functions are far from cheap.  Doing only the detoast part
seems like it's still misleading.

Do we need to go as far as offering both text-output and binary-output
options?

                        regards, tom lane
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 8570b14f62..dc52019a3e 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -47,7 +47,6 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL;
 /* Hook for plugins to get control in explain_get_index_name() */
 explain_get_index_name_hook_type explain_get_index_name_hook = NULL;
 
-
 /* OR-able flags for ExplainXMLTag() */
 #define X_OPENING 0
 #define X_CLOSING 1
@@ -155,6 +154,8 @@ static void ExplainYAMLLineStarting(ExplainState *es);
 static void escape_yaml(StringInfo buf, const char *str);
 
 
+static DestReceiver *CreateSerializeDestReceiver(void);
+
 
 /*
  * ExplainQuery -
@@ -192,6 +193,8 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 			es->settings = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "generic_plan") == 0)
 			es->generic = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "serialize") == 0)
+			es->serialize = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "timing") == 0)
 		{
 			timing_set = true;
@@ -244,6 +247,12 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("EXPLAIN option TIMING requires ANALYZE")));
 
+	/* check that serialize is used with EXPLAIN ANALYZE */
+	if (es->serialize && !es->analyze)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("EXPLAIN option SERIALIZE requires ANALYZE")));
+
 	/* check that GENERIC_PLAN is not used with EXPLAIN ANALYZE */
 	if (es->generic && es->analyze)
 		ereport(ERROR,
@@ -565,9 +574,16 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	/*
 	 * Normally we discard the query's output, but if explaining CREATE TABLE
 	 * AS, we'd better use the appropriate tuple receiver.
+	 * Also if we choose to serialize during explain, we need to send the
+	 * tuples to a receiver that performs the serializing
+	 * which includes detoasting and the conversion of the attributes
+	 * into the protocol receiving format. The latter can also be costly
+	 * for some types
 	 */
 	if (into)
 		dest = CreateIntoRelDestReceiver(into);
+	else if (es->analyze && es->serialize)
+		dest = CreateSerializeDestReceiver();
 	else
 		dest = None_Receiver;
 
@@ -606,6 +622,9 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 		/* run cleanup too */
 		ExecutorFinish(queryDesc);
 
+		/* call the dest receiver's destroy method even during explain */
+		dest->rDestroy(dest);
+
 		/* We can't run ExecutorEnd 'till we're done printing the stats... */
 		totaltime += elapsed_time(&starttime);
 	}
@@ -5052,3 +5071,193 @@ escape_yaml(StringInfo buf, const char *str)
 {
 	escape_json(buf, str);
 }
+
+
+/* Serializing DestReceiver functions
+ *
+ * the idea is that running  EXPLAIN (ANALYZE)  sometimes gives results that
+ * are way unrealistic, since during explain there is no detoasting of
+ * values. That means you get no idea what your actual query time is going
+ * to be, nor what the actual overhead of detoasting is.
+ * 
+ * Also the output/send functions of some types may add additional overhead
+ * and need to be invoked for a realistic impression of the runtime of explain
+ *
+ * if you run EXPLAIN (ANAYLZE, SERIALIZE) then you get an impression of
+ * what is actually happening when you use postgresql as a document-database
+ * or have large values for some other reason.
+ */
+
+/* an attribute info cached for each column */
+typedef struct
+{								/* Per-attribute information */
+	Oid			typoutput;		/* Oid for the type's text output fn */
+	Oid			typsend;		/* Oid for the type's binary output fn */
+	bool		typisvarlena;	/* is it varlena (ie possibly toastable)? */
+	int16		format;			/* format code for this column */
+	FmgrInfo	finfo;			/* Precomputed call info for output fn */
+} SerializeAttrInfo;
+
+typedef struct
+{
+	/* receiver for the tuples, that just serializes */
+	DestReceiver		destRecevier;
+	MemoryContext		memoryContext;
+	MemoryContext		oldMemoryContext;	/* restore to old when done */
+	TupleDesc			attrinfo;
+	int					nattrs;
+	SerializeAttrInfo	*infos;				/* Cached info about each attr */
+} SerializeDestReceiver;
+
+/*
+ * Get the lookup info that the row-callback of the receiver needs. this code
+ * is similar to the code from printup.c except that it doesn't do any actual
+ * output.
+ */
+static void
+serialize_prepare_info(SerializeDestReceiver *receiver, TupleDesc typeinfo,
+	int nattrs)
+{
+	int i;
+
+	/* get rid of any old data */
+	if (receiver->infos)
+		pfree(receiver->infos);
+	receiver->infos = NULL;
+
+	receiver->attrinfo = typeinfo;
+	receiver->nattrs = nattrs;
+	if (nattrs <= 0)
+		return;
+
+	receiver->infos = (SerializeAttrInfo *) palloc0(
+		nattrs * sizeof(SerializeAttrInfo));
+	for (i = 0; i < nattrs; i++)
+	{
+		SerializeAttrInfo *info = &(receiver->infos[i]);
+		/* assume text output for all attributes */
+		info->format = 0;
+	}
+
+	for (i = 0; i < nattrs; i++)
+	{
+		SerializeAttrInfo *info = &(receiver->infos[i]);
+		Form_pg_attribute attr = TupleDescAttr(typeinfo, i);
+		if (info->format == 0)
+		{
+			getTypeOutputInfo(attr->atttypid,
+							  &info->typoutput,
+							  &info->typisvarlena);
+			fmgr_info(info->typoutput, &info->finfo);
+		}
+		else if (info->format == 1)
+		{
+			getTypeBinaryOutputInfo(attr->atttypid,
+									&info->typsend,
+									&info->typisvarlena);
+			fmgr_info(info->typsend, &info->finfo);
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unsupported format code: %d", info->format)));
+	}
+}
+
+
+
+/* this method receives the tuples/records during explain (analyze, serialize)
+ * and serializes them.
+ * this method is otherwise not called and its purpose is to serialize to
+ * make analyze optionally more realistic. serialization includes detoasting
+ * and invoking the type's output (or send) function, which is what happens
+ * when you actually run the query. both steps can add overhead and thus
+ * analyze can be very misleading if those two steps cannot optionally be
+ * performed.
+ */
+static bool
+serializeAnalyzeReceive(TupleTableSlot *slot, DestReceiver *self)
+{
+	int i;
+	TupleDesc tupdesc;
+	SerializeDestReceiver *receiver = (SerializeDestReceiver*) self;
+
+	tupdesc  = slot->tts_tupleDescriptor;
+	/* Cache attribute infos and function oid if outdated */
+	if (receiver->attrinfo != tupdesc || receiver->nattrs != tupdesc->natts)
+		serialize_prepare_info(receiver, tupdesc, tupdesc->natts);
+
+	/* Fill all of the slot's attributes, we can now use slot->tts_values
+	 * and its tts_isnull array which should be long enough even if added
+	 * a null-column to the table */
+	slot_getallattrs(slot);
+
+	receiver->oldMemoryContext = MemoryContextSwitchTo(
+		receiver->memoryContext);
+
+	/* Iterate over all attributes of the tuple and invoke the output func
+	 * (or send function in case of a binary format). completely ignore the
+	 * result. MemoryContext is free'd via reset at the end of the this
+	 * per-tuple callback anyhow.
+	 * NOTE: when invoking the output function detoasting is implicitly
+	 * performed when needed. This adds major overhead to plain 
+	 * explain-analyze and this is what is desired here.
+	 */
+	for (i = 0; i < tupdesc->natts; i++)
+	{
+		SerializeAttrInfo *info = &(receiver->infos[i]);
+		Datum attr = slot->tts_values[i];
+		if (!slot->tts_isnull[i]) {
+			if (info->format == 0)
+			{
+				/* Text output, just ignore the result */
+				(void) OutputFunctionCall(&info->finfo, attr);
+			}
+			else
+			{
+				/* Binary output, just ignore the result */
+				(void) SendFunctionCall(&info->finfo, attr);
+			}
+		}
+	}
+	MemoryContextSwitchTo(receiver->oldMemoryContext);
+	MemoryContextReset(receiver->memoryContext);
+	return true;
+}
+
+static void
+serializeAnalyzeStartup(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+	SerializeDestReceiver *receiver = (SerializeDestReceiver*) self;
+	/* memory context for our work */
+	receiver->memoryContext = AllocSetContextCreate(CurrentMemoryContext,
+		"SerializeTupleReceive", ALLOCSET_DEFAULT_SIZES);
+}
+
+static void
+serializeAnalyzeCleanup(DestReceiver *self)
+{
+	SerializeDestReceiver *receiver = (SerializeDestReceiver*) self;
+	MemoryContextDelete(receiver->memoryContext);
+}
+
+static void
+serializeAnalyzeDestroy(DestReceiver *self)
+{
+	pfree(self);
+}
+
+static DestReceiver *CreateSerializeDestReceiver(void)
+{
+	/* Attention must palloc0 (like calloc) here, since some fields like
+	 * infos and nattrs are expected to be 0 or NULL initially */
+	SerializeDestReceiver *self = (SerializeDestReceiver*) palloc0(
+		sizeof(SerializeDestReceiver));
+
+	self->destRecevier.receiveSlot = serializeAnalyzeReceive;
+	self->destRecevier.rStartup = serializeAnalyzeStartup;
+	self->destRecevier.rShutdown = serializeAnalyzeCleanup;
+	self->destRecevier.rDestroy = serializeAnalyzeDestroy;
+	self->destRecevier.mydest = DestNone;
+	return (DestReceiver *) self;
+}
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 3d3e632a0c..5321a7f0a1 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -47,6 +47,14 @@ typedef struct ExplainState
 	bool		summary;		/* print total planning and execution timing */
 	bool		settings;		/* print modified settings */
 	bool		generic;		/* generate a generic plan */
+	bool		serialize;      /* serialize all values, only applicable
+								 * when "analyze" is active. serializing
+								 * invokes the type's output function in
+								 * the same way printtup.c does then tuples
+								 * are send to the client. this includes
+								 * detoasting of toasted values and makes
+								 * EXPLAIN(ANALYZE,SERIALIZE) return realistic
+								 * (longer runtimes */
 	ExplainFormat format;		/* output format */
 	/* state for output formatting --- not reset for each new plan tree */
 	int			indent;			/* current indentation level */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 1aca77491b..f7a09f3932 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -559,3 +559,23 @@ select explain_filter('explain (verbose) select * from int8_tbl i8');
  Query Identifier: N
 (3 rows)
 
+-- Test that SERIALIZABLE is accepted as a parameter to explain
+-- seems hard to test the expected longer runtime of serialize
+-- since one needs to setup enough data to make such a test stable
+-- also timing are filtered out by explain_filter anyway
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (analyze,serialize) select * from test_serialize');
+                                          explain_filter                                          
+--------------------------------------------------------------------------------------------------
+ Seq Scan on test_serialize  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(3 rows)
+
+drop table test_serialize;
+-- Test SERIALIZE is _not_ accepted as a parameter to explain unless ANALYZE is specified
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (serialize) select * from test_serialize');
+ERROR:  EXPLAIN option SERIALIZE requires ANALYZE
+CONTEXT:  PL/pgSQL function explain_filter(text) line 5 at FOR over EXECUTE statement
+drop table test_serialize;
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index b6b7beab27..823770d546 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -154,3 +154,18 @@ select explain_filter('explain (verbose) select * from t1 where pg_temp.mysin(f1
 -- Test compute_query_id
 set compute_query_id = on;
 select explain_filter('explain (verbose) select * from int8_tbl i8');
+
+-- Test that SERIALIZABLE is accepted as a parameter to explain
+-- seems hard to test the expected longer runtime of serialize
+-- since one needs to setup enough data to make such a test stable
+-- also timing are filtered out by explain_filter anyway
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (analyze,serialize) select * from test_serialize');
+drop table test_serialize;
+
+-- Test SERIALIZE is _not_ accepted as a parameter to explain unless ANALYZE is specified
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (serialize) select * from test_serialize');
+drop table test_serialize;
+
+

Reply via email to