[BUG] Cannot flush buffers of temp table deleted from other session

Daniil Davydov Wed, 11 Jun 2025 13:17:55 -0700

Hi,
Ability to do a DROP temporary table of other session leads to error :

session 1 :
create temp table test (id int);
-- let's assume that 'test' will be placed in schema 'pg_temp_0' and
has relfilepath 'base/5/t0_16390'
insert into test select generate_series(1, 1000); -- make few buffers dirty


session 2 :
drop table pg_temp_0.test; -- drop temp table of session 1

session 1:
create temp table test (id int); -- create another temp table
-- make dirty too many buffers (> temp_buffers), so postgres will try
to flush some of them
insert into test select generate_series(1, 250000);
ERROR:  could not open file "base/5/t0_16390": No such file or directory

Thus, we were trying to flush buffers of an already deleted temp
table. Error occurs here : GetLocalVictimBuffer -> FlushLocalBuffer ->
smgropen(BufTagGetRelFileLocator(&bufHdr->tag), MyProcNumber).
To be honest, I don't see a good way to handle this error, because
there may be a lot of reasons why flushing failed (up to the point
that the buffer tag may be corrupted).

I attach a patch (targeted on the master branch) that contains a
sample idea of error handling - don't throw an error if we can ensure
that the relation has been deleted.
Patch contains a very rough assumption, that relation's relfilenode ==
relation's oid.

What do you think about it?

BTW, there are other bugs that can occur during interacting with other
session temp tables. I described them and suggested corrections in
this thread [1].

[1] 
https://www.postgresql.org/message-id/flat/CAJDiXgj72Axj0d4ojKdRWG_rnkfs4uWY414NL%3D15sCvh7-9rwg%40mail.gmail.com

--
Best regards,
Daniil Davydov

From 7775fc7afbefceebf88d8be1cd94da939807035c Mon Sep 17 00:00:00 2001
From: Daniil Davidov <[email protected]>
Date: Thu, 12 Jun 2025 02:11:43 +0700
Subject: [PATCH] Handle error with flushing dirty buffer of deleted temp table

---
 src/backend/storage/buffer/localbuf.c | 90 +++++++++++++++++++++++++--
 1 file changed, 84 insertions(+), 6 deletions(-)

diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index ba26627f7b0..8b9da95c095 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -26,6 +26,7 @@
 #include "utils/memdebug.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
+#include "utils/syscache.h"
 
 
 /*#define LBDEBUG*/
@@ -225,6 +226,7 @@ GetLocalVictimBuffer(void)
 	int			victim_bufid;
 	int			trycounter;
 	BufferDesc *bufHdr;
+	MemoryContext old_mcxt = CurrentMemoryContext;
 
 	ResourceOwnerEnlarge(CurrentResourceOwner);
 
@@ -281,12 +283,88 @@ GetLocalVictimBuffer(void)
 		LocalBufHdrGetBlock(bufHdr) = GetLocalBufferStorage();
 	}
 
-	/*
-	 * this buffer is not referenced but it might still be dirty. if that's
-	 * the case, write it out before reusing it!
-	 */
-	if (pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY)
-		FlushLocalBuffer(bufHdr, NULL);
+	PG_TRY();
+	{
+		/*
+		 * this buffer is not referenced but it might still be dirty. if that's
+		 * the case, write it out before reusing it!
+		 */
+		if (pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY)
+			FlushLocalBuffer(bufHdr, NULL);
+	}
+	PG_CATCH();
+	{
+		MemoryContext	error_mctx;
+		ErrorData		*error;
+		bool			can_handle_error = false;
+
+		/* Switch to the original context & copy edata */
+		error_mctx = MemoryContextSwitchTo(old_mcxt);
+		error = CopyErrorData();
+
+		/*
+		 * If we failed during file access, there can be two cases :
+		 * 1) Buffer belongs to relation deleted from other session.
+		 * 2) Any other error.
+		 *
+		 * In first case we still can continue without throwing error - just
+		 * don't try to flush this buffer.
+		 */
+		errcode_for_file_access();
+		if (error->sqlerrcode == ERRCODE_UNDEFINED_FILE)
+		{
+			HeapTuple	tuple;
+			Oid			estimated_oid;
+
+			/*
+			 * Buffer tag contains only relfilenode, that don't necessary
+			 * matches relation's oid. For lack of other information, just
+			 * assume that oid == relfilenumber.
+			 */
+			estimated_oid = BufTagGetRelFileLocator(&bufHdr->tag).relNumber;
+
+			/*
+			 * Try to find relcache entry for relation. If we fail - that means
+			 * that relation was deleted (e.g. first case).
+			 */
+			tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(estimated_oid));
+			if (!HeapTupleIsValid(tuple))
+			{
+				uint32			buf_state;
+				RelPathStr		path;
+				SMgrRelation	reln;
+
+				can_handle_error = true;
+
+				reln = smgropen(BufTagGetRelFileLocator(&bufHdr->tag),
+								MyProcNumber);
+
+				path = relpath(reln->smgr_rlocator,
+							   BufTagGetForkNum(&bufHdr->tag));
+
+				ereport(WARNING,
+						(errmsg("encountered dirty local buffer that belongs to"
+								"to deleted relation \"%s\"", path.str),
+						 errhint("if this relation had not been deleted from "
+								 "other session, then local buffers are corrupted")));
+
+				buf_state = pg_atomic_read_u32(&bufHdr->state);
+				buf_state &= ~BM_IO_ERROR;
+				buf_state &= ~BM_DIRTY;
+				pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
+			}
+		}
+
+		if (!can_handle_error)
+		{
+			MemoryContextSwitchTo(error_mctx);
+			PG_RE_THROW();
+		}
+
+		FlushErrorState();
+		FreeErrorData(error);
+	}
+	PG_END_TRY();
 
 	/*
 	 * Remove the victim buffer from the hashtable and mark as invalid.
-- 
2.43.0

[BUG] Cannot flush buffers of temp table deleted from other session

Reply via email to