On Sun, Mar 08, 2020 at 03:40:09PM -0400, Tom Lane wrote:
> Justin Pryzby <pry...@telsasoft.com> writes:
> > On Sun, Mar 08, 2020 at 02:37:49PM -0400, Tom Lane wrote:
> >> I guess we ought to change that function to use returns-a-tuplestore
> >> protocol instead of thinking it can hold a directory open across calls.
> > Thanks for the analysis.
> > Do you mean it should enumerate all files during the initial SRF call, or 
> > use
> > something other than the SRF_* macros ?
> It has to enumerate all the files during the first call.  I suppose it

> I've just finished scanning the source code and concluding that all
> of these functions are similarly broken:
> pg_ls_dir_files

I patched this one to see what it looks like and to allow /hopefully/ moving
forward one way or another with the pg_ls_tmpfile() patch set (or at least
avoid trying to do anything there which is too inconsistent with this fix).

> I don't see anything in the documentation (either funcapi.h or
> xfunc.sgml) warning that the function might not get run to completion,
> either ...

Also, at first glance, these seem to be passing constant "randomAccess=true"
rather than (bool) (rsinfo->allowedModes&SFRM_Materialize_Random)

$ git grep -wl SFRM_Materialize |xargs grep -l 'tuplestore_begin_heap(true'
contrib/dblink/dblink.c
contrib/pageinspect/brinfuncs.c
contrib/pg_stat_statements/pg_stat_statements.c
src/backend/access/transam/xlogfuncs.c
src/backend/commands/event_trigger.c
src/backend/commands/extension.c
src/backend/foreign/foreign.c
src/backend/replication/logical/launcher.c
src/backend/replication/logical/logicalfuncs.c
src/backend/replication/logical/origin.c
src/backend/replication/slotfuncs.c
src/backend/replication/walsender.c
src/backend/storage/ipc/shmem.c
src/backend/utils/adt/pgstatfuncs.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/pg_config.c

-- 
Justin
>From 1fd2918d65ed8cc64158e407e56ed61b44e951db Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Tue, 10 Mar 2020 21:01:47 -0500
Subject: [PATCH v1] pg_ls_dir_files: avoid leaking DIR if not run to
 completion

Change to return a tuplestore rather than leaving directory open across calls.

Discussion: https://www.postgresql.org/message-id/20200308173103.GC1357%40telsasoft.com
See also: 9cb7db3f0
---
 src/backend/utils/adt/genfile.c | 90 +++++++++++++++------------------
 1 file changed, 42 insertions(+), 48 deletions(-)

diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
index 5bda2af87c..046d218ffa 100644
--- a/src/backend/utils/adt/genfile.c
+++ b/src/backend/utils/adt/genfile.c
@@ -527,67 +527,63 @@ pg_ls_dir_1arg(PG_FUNCTION_ARGS)
 static Datum
 pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 {
-	FuncCallContext *funcctx;
 	struct dirent *de;
-	directory_fctx *fctx;
-
-	if (SRF_IS_FIRSTCALL())
-	{
-		MemoryContext oldcontext;
-		TupleDesc	tupdesc;
+	MemoryContext oldcontext;
+	TupleDesc	tupdesc;
+	ReturnSetInfo	*rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	bool		randomAccess = (rsinfo->allowedModes&SFRM_Materialize_Random) != 0;
+	DIR			*dirdesc;
+	Tuplestorestate *tupstore;
 
-		funcctx = SRF_FIRSTCALL_INIT();
-		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that cannot accept a set")));
+	/* check to see if caller supports us returning a tuplestore */
+	if (!(rsinfo->allowedModes & SFRM_Materialize))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("materialize mode required, but it is not "
+					 "allowed in this context")));
 
-		fctx = palloc(sizeof(directory_fctx));
+	/* The Tuplestore and TupleDesc should be created in ecxt_per_query_memory */
+	oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
 
-		tupdesc = CreateTemplateTupleDesc(3);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
-						   TEXTOID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "size",
-						   INT8OID, -1, 0);
-		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "modification",
-						   TIMESTAMPTZOID, -1, 0);
-		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
 
-		fctx->location = pstrdup(dir);
-		fctx->dirdesc = AllocateDir(fctx->location);
+	tupstore = tuplestore_begin_heap(randomAccess, false, work_mem);
+	rsinfo->returnMode = SFRM_Materialize;
+	rsinfo->setResult = tupstore;
+	rsinfo->setDesc = tupdesc;
 
-		if (!fctx->dirdesc)
-		{
-			if (missing_ok && errno == ENOENT)
-			{
-				MemoryContextSwitchTo(oldcontext);
-				SRF_RETURN_DONE(funcctx);
-			}
-			else
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not open directory \"%s\": %m",
-								fctx->location)));
-		}
+	MemoryContextSwitchTo(oldcontext);
 
-		funcctx->user_fctx = fctx;
-		MemoryContextSwitchTo(oldcontext);
+	dirdesc = AllocateDir(dir);
+	if (!dirdesc)
+	{
+		if (missing_ok && errno == ENOENT)
+			return NULL; // XXX
+		else
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not open directory \"%s\": %m",
+							dir)));
 	}
 
-	funcctx = SRF_PERCALL_SETUP();
-	fctx = (directory_fctx *) funcctx->user_fctx;
-
-	while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
+	while ((de = ReadDir(dirdesc, dir)) != NULL)
 	{
 		Datum		values[3];
-		bool		nulls[3];
+		bool		nulls[3] = {0};
 		char		path[MAXPGPATH * 2];
 		struct stat attrib;
-		HeapTuple	tuple;
 
 		/* Skip hidden files */
 		if (de->d_name[0] == '.')
 			continue;
 
 		/* Get the file info */
-		snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
+		snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
 		if (stat(path, &attrib) < 0)
 			ereport(ERROR,
 					(errcode_for_file_access(),
@@ -600,14 +596,12 @@ pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
 		values[0] = CStringGetTextDatum(de->d_name);
 		values[1] = Int64GetDatum((int64) attrib.st_size);
 		values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
-		memset(nulls, 0, sizeof(nulls));
-
-		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
-		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
 	}
 
-	FreeDir(fctx->dirdesc);
-	SRF_RETURN_DONE(funcctx);
+	tuplestore_donestoring(tupstore);
+	FreeDir(dirdesc);
+	return NULL;
 }
 
 /* Function to return the list of files in the log directory */
-- 
2.17.0

Reply via email to