From 321cdb85893b57e959f8633d9ffd866f8be2b3fd Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Mon, 20 Jun 2022 10:21:22 +0200
Subject: [PATCH v5] Add protections in xlog record APIs against large numbers
 and overflows.

Before this; it was possible for an extension to create malicious WAL records
that were too large to replay; or that would overflow the xl_tot_len field,
causing potential corruption in WAL record IO ops. Emitting invalid records
was also possible through pg_logical_emit_message(), which allowed you to emit
arbitrary amounts of data up to 2GB, much higher than the replay limit of 1GB.
---
 src/backend/access/transam/xloginsert.c | 59 +++++++++++++++++++++----
 src/backend/access/transam/xlogreader.c | 12 ++++-
 src/include/access/xlogreader.h         | 36 ++++++++++++++-
 src/include/access/xlogrecord.h         |  4 ++
 4 files changed, 101 insertions(+), 10 deletions(-)

diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 2ce9be2cc7..a8ac036cbd 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -141,7 +141,18 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
 									   bool *topxid_included);
 static bool XLogCompressBackupBlock(char *page, uint16 hole_offset,
 									uint16 hole_length, char *dest, uint16 *dlen);
+static inline void XLogErrorDataLimitExceeded();
 
+/*
+ * Error due to exceeding the maximum size of a WAL record, or registering
+ * more datas than are being accounted for by the XLog infrastructure.
+ */
+static inline void
+XLogErrorDataLimitExceeded()
+{
+	elog(ERROR, "too much WAL data");
+}
+ 
 /*
  * Begin constructing a WAL record. This must be called before the
  * XLogRegister* functions and XLogInsert().
@@ -352,10 +363,21 @@ XLogRegisterData(char *data, int len)
 {
 	XLogRecData *rdata;
 
-	Assert(begininsert_called);
+	Assert(begininsert_called && len >= 0 && XLogRecLengthIsValid(len));
+
+	/*
+	 * Check against max_rdatas; and ensure we don't fill a record with
+	 * more data than can be replayed. Records are allocated in one chunk
+	 * with some overhead, so ensure XLogRecLengthIsValid() for that size
+	 * of record.
+	 *
+	 * We also protect against overflows by first casting to uint64 - with only
+	 * uint32-operations this could still be an issue.
+	 */
+	if (unlikely(num_rdatas >= max_rdatas) ||
+		unlikely(!XLogRecLengthIsValid((uint64) mainrdata_len + (uint64) len)))
+		XLogErrorDataLimitExceeded();
 
-	if (num_rdatas >= max_rdatas)
-		elog(ERROR, "too much WAL data");
 	rdata = &rdatas[num_rdatas++];
 
 	rdata->data = data;
@@ -391,7 +413,7 @@ XLogRegisterBufData(uint8 block_id, char *data, int len)
 	registered_buffer *regbuf;
 	XLogRecData *rdata;
 
-	Assert(begininsert_called);
+	Assert(begininsert_called && len >= 0 && len <= UINT16_MAX);
 
 	/* find the registered buffer struct */
 	regbuf = &registered_buffers[block_id];
@@ -399,8 +421,16 @@ XLogRegisterBufData(uint8 block_id, char *data, int len)
 		elog(ERROR, "no block with id %d registered with WAL insertion",
 			 block_id);
 
-	if (num_rdatas >= max_rdatas)
-		elog(ERROR, "too much WAL data");
+	/*
+	 * Check against max_rdatas; and ensure we don't register more data per
+	 * buffer than can be handled by the physical data format; 
+	 * i.e. that regbuf->rdata_len does not grow beyond what
+	 * XLogRecordBlockHeader->data_length can hold.
+	 */
+	if (unlikely(num_rdatas >= max_rdatas) ||
+		unlikely(regbuf->rdata_len + len > UINT16_MAX))
+		XLogErrorDataLimitExceeded();
+
 	rdata = &rdatas[num_rdatas++];
 
 	rdata->data = data;
@@ -519,7 +549,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 				   XLogRecPtr *fpw_lsn, int *num_fpi, bool *topxid_included)
 {
 	XLogRecData *rdt;
-	uint32		total_len = 0;
+	uint64		total_len = 0;
 	int			block_id;
 	pg_crc32c	rdata_crc;
 	registered_buffer *prev_regbuf = NULL;
@@ -756,12 +786,18 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 
 		if (needs_data)
 		{
+			/*
+			 * When copying to XLogRecordBlockHeader, the length is narrowed
+			 * to an uint16. We double-check that that is still correct.
+			 */
+			Assert(regbuf->rdata_len <= UINT16_MAX);
+
 			/*
 			 * Link the caller-supplied rdata chain for this buffer to the
 			 * overall list.
 			 */
 			bkpb.fork_flags |= BKPBLOCK_HAS_DATA;
-			bkpb.data_length = regbuf->rdata_len;
+			bkpb.data_length = (uint16) regbuf->rdata_len;
 			total_len += regbuf->rdata_len;
 
 			rdt_datas_last->next = regbuf->rdata_head;
@@ -858,6 +894,13 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 	for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next)
 		COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
 
+	/*
+	 * Ensure that xlogreader.c can read the record by ensuring that the
+	 * data section of the WAL record (plus infra overhead) can be allocated.
+	 */
+	if (unlikely(!XLogRecLengthIsValid(total_len)))
+		XLogErrorDataLimitExceeded();
+
 	/*
 	 * Fill in the fields in the record header. Prev-link is filled in later,
 	 * once we know where in the WAL the record will be inserted. The CRC does
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index cf5db23cb8..63a4ff0a0d 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -669,7 +669,17 @@ restart:
 			report_invalid_record(state,
 								  "invalid record length at %X/%X: wanted %u, got %u",
 								  LSN_FORMAT_ARGS(RecPtr),
-								  (uint32) SizeOfXLogRecord, total_len);
+								  (uint32) SizeOfXLogRecord,
+								  total_len);
+			goto err;
+		}
+		if (!XLogRecLengthIsValid(total_len))
+		{
+			report_invalid_record(state,
+								  "invalid record length at %X/%X: wanted %u, got %u",
+								  LSN_FORMAT_ARGS(RecPtr),
+								  (uint32) XLogRecMaxLength,
+								  total_len);
 			goto err;
 		}
 		gotheader = false;
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index e73ea4a840..b2939b13c5 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -36,6 +36,7 @@
 
 #ifndef FRONTEND
 #include "access/transam.h"
+#include "utils/memutils.h"
 #endif
 
 #include "access/xlogrecord.h"
@@ -425,7 +426,40 @@ extern bool DecodeXLogRecord(XLogReaderState *state,
 
 #ifndef FRONTEND
 extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
-#endif
+#else
+#define MaxAllocSize (Size) 0x3FFFFFFF
+#endif /* ifndef FRONTEND */
+
+/*
+ * XLogRecMaxSize - the maximum WAL record size that the xlogreader
+ * infrastructure can handle.
+ *
+ * Defined by MaxAllocSize and the various overheads used in the
+ * DecodedXLogRecord infrastructure.
+ *
+ * see DecodeXLogRecordRequiredSpace and allocate_recordbuf for places where
+ * allocations of full records + overhead are done.
+ */
+#define XLogRecMaxLength \
+( \
+	Min( \
+		(MaxAllocSize - ( \
+			offsetof(DecodedXLogRecord, blocks[0]) + \
+			(sizeof(DecodedBkpBlock) * (XLR_MAX_BLOCK_ID + 1)) + \
+			(MAXIMUM_ALIGNOF - 1) + \
+			(MAXIMUM_ALIGNOF - 1) + \
+			(MAXIMUM_ALIGNOF - 1) \
+		)), \
+		MaxAllocSize - (MaxAllocSize % XLOG_BLCKSZ) \
+	) \
+)
+
+/*
+ * XLogRecLengthIsValid
+ *
+ * Check that this length does not exceed the maximum xlog record length
+ */
+#define XLogRecLengthIsValid(xlr_size) ((xlr_size) <= XLogRecMaxLength)
 
 extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
 extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index 052ac6817a..3575363e84 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -37,6 +37,10 @@
  * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
  * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
  * used to distinguish between block references, and the main data structs.
+ *
+ * Note that xlogreader.c limits the total size of the xl_tot_len to
+ * MaxAllocSize (1GB - 1). This means that this is also the maximum size
+ * of a single WAL record - we would be unable to replay any larger record.
  */
 typedef struct XLogRecord
 {
-- 
2.30.2

