Hi,

On 2023-04-24 15:32:25 -0700, Andres Freund wrote:
> On 2023-04-24 10:53:35 +0200, Christoph Berg wrote:
> > I'm often seeing PG16 builds erroring out in the pgbench tests:
> > I don't think the disk is full since it's always hitting that same
> > spot, on some of the builds:
> 
> Yea, the EINTR pretty clearly indicates that it's not really out-of-space.

FWIW, I tried to reproduce this, without success - not too surprising, I
assume it's rather timing dependent.


> We obviously can add a retry loop to FileFallocate(), similar to what's
> already present e.g. in FileRead(). But I wonder if we shouldn't go a bit
> further, and do it for all the fd.c routines where it's remotely plausible
> EINTR could be returned? It's a bit silly to add EINTR retries one-by-one to
> the functions.
> 
> 
> The following are documented to potentially return EINTR, without fd.c having
> code to retry:
> 
> - FileWriteback() / pg_flush_data()
> - FileSync() / pg_fsync()
> - FileFallocate()
> - FileTruncate()
> 
> With the first two there's the added complication that it's not entirely
> obvious whether it'd be better to handle this in File* or pg_*. I'd argue the
> latter is a bit more sensible?

A prototype of that approach is attached. I pushed the retry handling into the
pg_* routines where applicable.  I guess we could add pg_* routines for
FileFallocate(), FilePrewarm() etc as well, but I didn't do that here.

Christoph, could you verify this fixes your issue?

Greetings,

Andres Freund
>From d8f5b0d4765044a0f35d01054c9d2720c6045b4c Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Mon, 24 Apr 2023 16:53:52 -0700
Subject: [PATCH v1] fd.c: Retry after EINTR in more places

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/backend/storage/file/fd.c | 71 +++++++++++++++++++++++++++++------
 1 file changed, 59 insertions(+), 12 deletions(-)

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 277a28fc137..2b232a80489 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -415,10 +415,18 @@ pg_fsync(int fd)
 int
 pg_fsync_no_writethrough(int fd)
 {
-	if (enableFsync)
-		return fsync(fd);
-	else
+	int		rc;
+
+	if (!enableFsync)
 		return 0;
+
+retry:
+	rc = fsync(fd);
+
+	if (rc == -1 && errno == EINTR)
+		goto retry;
+
+	return rc;
 }
 
 /*
@@ -448,10 +456,18 @@ pg_fsync_writethrough(int fd)
 int
 pg_fdatasync(int fd)
 {
-	if (enableFsync)
-		return fdatasync(fd);
-	else
+	int		rc;
+
+	if (!enableFsync)
 		return 0;
+
+retry:
+	rc = fdatasync(fd);
+
+	if (rc == -1 && errno == EINTR)
+		goto retry;
+
+	return rc;
 }
 
 /*
@@ -483,6 +499,7 @@ pg_flush_data(int fd, off_t offset, off_t nbytes)
 		if (not_implemented_by_kernel)
 			return;
 
+retry:
 		/*
 		 * sync_file_range(SYNC_FILE_RANGE_WRITE), currently linux specific,
 		 * tells the OS that writeback for the specified blocks should be
@@ -498,6 +515,9 @@ pg_flush_data(int fd, off_t offset, off_t nbytes)
 		{
 			int			elevel;
 
+			if (rc == EINTR)
+				goto retry;
+
 			/*
 			 * For systems that don't have an implementation of
 			 * sync_file_range() such as Windows WSL, generate only one
@@ -629,32 +649,51 @@ pg_flush_data(int fd, off_t offset, off_t nbytes)
 #endif
 }
 
+static int
+pg_ftruncate(int fd, off_t length)
+{
+	int			ret;
+
+retry:
+	ret = ftruncate(fd, length);
+
+	if (ret == -1 && errno == EINTR)
+		goto retry;
+
+	return ret;
+}
+
 /*
  * Truncate a file to a given length by name.
  */
 int
 pg_truncate(const char *path, off_t length)
 {
+	int			ret;
 #ifdef WIN32
 	int			save_errno;
-	int			ret;
 	int			fd;
 
 	fd = OpenTransientFile(path, O_RDWR | PG_BINARY);
 	if (fd >= 0)
 	{
-		ret = ftruncate(fd, length);
+		ret = pg_ftruncate(fd, length);
 		save_errno = errno;
 		CloseTransientFile(fd);
 		errno = save_errno;
 	}
 	else
 		ret = -1;
+#else
+
+retry:
+	ret = truncate(path, length);
+
+	if (ret == -1 && errno == EINTR)
+		goto retry;
+#endif
 
 	return ret;
-#else
-	return truncate(path, length);
-#endif
 }
 
 /*
@@ -2001,11 +2040,15 @@ FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
 	if (returnCode < 0)
 		return returnCode;
 
+retry:
 	pgstat_report_wait_start(wait_event_info);
 	returnCode = posix_fadvise(VfdCache[file].fd, offset, amount,
 							   POSIX_FADV_WILLNEED);
 	pgstat_report_wait_end();
 
+	if (returnCode == EINTR)
+		goto retry;
+
 	return returnCode;
 #else
 	Assert(FileIsValid(file));
@@ -2281,12 +2324,16 @@ FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
 	if (returnCode < 0)
 		return -1;
 
+retry:
+
 	pgstat_report_wait_start(wait_event_info);
 	returnCode = posix_fallocate(VfdCache[file].fd, offset, amount);
 	pgstat_report_wait_end();
 
 	if (returnCode == 0)
 		return 0;
+	else if (returnCode == EINTR)
+		goto retry;
 
 	/* for compatibility with %m printing etc */
 	errno = returnCode;
@@ -2334,7 +2381,7 @@ FileTruncate(File file, off_t offset, uint32 wait_event_info)
 		return returnCode;
 
 	pgstat_report_wait_start(wait_event_info);
-	returnCode = ftruncate(VfdCache[file].fd, offset);
+	returnCode = pg_ftruncate(VfdCache[file].fd, offset);
 	pgstat_report_wait_end();
 
 	if (returnCode == 0 && VfdCache[file].fileSize > offset)
-- 
2.38.0

Reply via email to