From 559fdd043c9a4de2b8cda3a779a39ccd4af97374 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Mon, 11 Dec 2023 15:02:58 +1300
Subject: [PATCH v4 1/8] Provide helper routine for partial vector I/O retry.

compute_remaining_iovec() is a re-usable routine for cases where
pg_readv()/pg_writev() reports a short transfer.  This will gain a new
user in a later patch, but can already replace the open-coded equivalent
code in the existing pg_pwritev_with_retry() function.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Discussion: https://postgr.es/m/CA+hUKGJkOiOCa+mag4BF+zHo7qo=o9CFheB8=g6uT5TUm2gkvA@mail.gmail.com
---
 src/common/file_utils.c         | 80 ++++++++++++++++++++++-----------
 src/include/common/file_utils.h |  5 +++
 2 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index abe5129412..54ef9c9561 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -581,6 +581,55 @@ get_dirent_type(const char *path,
 	return result;
 }
 
+/*
+ * Compute what remains to be transferred after a possibly partial vectored
+ * read or write.  The region of 'source' that begins after 'transferred'
+ * bytes is written to 'destination', and its length is returned.  'source'
+ * and 'destination' may point to the same array, in which case it is adjusted
+ * in-place; otherwise 'destination' must have enough space for 'iovcnt'
+ * elements.  Returns 0 for a fully completed transfer, though some callers
+ * may already know the total size and be able to avoid calling this function
+ * in that common case.
+ */
+int
+compute_remaining_iovec(struct iovec *destination,
+						const struct iovec *source,
+						int iovcnt,
+						size_t transferred)
+{
+	Assert(iovcnt > 0);
+
+	/* Skip wholly transferred iovecs. */
+	while (source->iov_len <= transferred)
+	{
+		transferred -= source->iov_len;
+		source++;
+		iovcnt--;
+
+		/* All iovecs transferred? */
+		if (iovcnt == 0)
+		{
+			/*
+			 * We don't expect the kernel to transfer more than we think we
+			 * asked for, or something is out of sync.
+			 */
+			Assert(transferred == 0);
+			return 0;
+		}
+	}
+
+	/* Copy the remaining iovecs to the front of the array. */
+	if (source != destination)
+		memmove(destination, source, sizeof(*source) * iovcnt);
+
+	/* Adjust leading iovec, which may have been partially transferred. */
+	Assert(destination->iov_len > transferred);
+	destination->iov_base = (char *) destination->iov_base + transferred;
+	destination->iov_len -= transferred;
+
+	return iovcnt;
+}
+
 /*
  * pg_pwritev_with_retry
  *
@@ -601,7 +650,7 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
 		return -1;
 	}
 
-	for (;;)
+	do
 	{
 		/* Write as much as we can. */
 		part = pg_pwritev(fd, iov, iovcnt, offset);
@@ -616,33 +665,14 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
 		sum += part;
 		offset += part;
 
-		/* Step over iovecs that are done. */
-		while (iovcnt > 0 && iov->iov_len <= part)
-		{
-			part -= iov->iov_len;
-			++iov;
-			--iovcnt;
-		}
-
-		/* Are they all done? */
-		if (iovcnt == 0)
-		{
-			/* We don't expect the kernel to write more than requested. */
-			Assert(part == 0);
-			break;
-		}
-
 		/*
-		 * Move whatever's left to the front of our mutable copy and adjust
-		 * the leading iovec.
+		 * See what is left.  On the first loop we used the caller's array,
+		 * but later loops we'll use our local copy that we are allowed to
+		 * mutate.
 		 */
-		Assert(iovcnt > 0);
-		memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
-		Assert(iov->iov_len > part);
-		iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
-		iov_copy[0].iov_len -= part;
+		iovcnt = compute_remaining_iovec(iov_copy, iov, iovcnt, part);
 		iov = iov_copy;
-	}
+	} while (iovcnt > 0);
 
 	return sum;
 }
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index 3bb20170cb..02a940e310 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -46,6 +46,11 @@ extern PGFileType get_dirent_type(const char *path,
 								  bool look_through_symlinks,
 								  int elevel);
 
+extern int	compute_remaining_iovec(struct iovec *destination,
+									const struct iovec *source,
+									int iovcnt,
+									size_t transferred);
+
 extern ssize_t pg_pwritev_with_retry(int fd,
 									 const struct iovec *iov,
 									 int iovcnt,
-- 
2.39.3 (Apple Git-145)

