Hi,

Xlogreader limits the size of what it considers valid xlog records to
MaxAllocSize; but this is not currently enforced in the
XLogRecAssemble API. This means it is possible to assemble a record
that postgresql cannot replay.
Similarly; it is possible to repeatedly call XlogRegisterData() so as
to overflow rec->xl_tot_len; resulting in out-of-bounds reads and
writes while processing record data;

PFA a patch that attempts to fix both of these issues in the insertion
API; by checking against overflows and other incorrectly large values
in the relevant functions in xloginsert.c. In this patch, I've also
added a comment to the XLogRecord spec to document that xl_tot_len
should not be larger than 1GB - 1B; and why that limit exists.

Kind regards,

Matthias van de Meent
From a72121b8fccbbf8c289e71a2c76801a1004f5353 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Fri, 11 Mar 2022 16:16:22 +0100
Subject: [PATCH v1] Add protections in xlog record APIs against large numbers
 and overflows.

Before this; it was possible for an extension to create malicious WAL records
that were too large to replay; or that would overflow the xl_tot_len field,
causing potential corruption in WAL record IO ops.
---
 src/backend/access/transam/xloginsert.c | 23 ++++++++++++++++++++++-
 src/include/access/xlogrecord.h         |  4 ++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index c260310c4c..ae654177de 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -342,6 +342,11 @@ XLogRegisterData(char *data, int len)
 
 	if (num_rdatas >= max_rdatas)
 		elog(ERROR, "too much WAL data");
+
+	/* protect against overflow */
+	if (unlikely((uint64) mainrdata_len + (uint64) len > UINT32_MAX))
+		elog(ERROR, "too much WAL data");
+
 	rdata = &rdatas[num_rdatas++];
 
 	rdata->data = data;
@@ -387,6 +392,11 @@ XLogRegisterBufData(uint8 block_id, char *data, int len)
 
 	if (num_rdatas >= max_rdatas)
 		elog(ERROR, "too much WAL data");
+
+	/* protect against overflow */
+	if (unlikely((uint64) regbuf->rdata_len + (uint64) len > UINT32_MAX))
+		elog(ERROR, "too much WAL data");
+
 	rdata = &rdatas[num_rdatas++];
 
 	rdata->data = data;
@@ -505,7 +515,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 				   XLogRecPtr *fpw_lsn, int *num_fpi, bool *topxid_included)
 {
 	XLogRecData *rdt;
-	uint32		total_len = 0;
+	uint64		total_len = 0;
 	int			block_id;
 	pg_crc32c	rdata_crc;
 	registered_buffer *prev_regbuf = NULL;
@@ -734,6 +744,10 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 
 		if (needs_data)
 		{
+			/* protect against overflow */
+			if (unlikely(regbuf->rdata_len > UINT16_MAX))
+				elog(ERROR, "too much WAL data for registered buffer");
+
 			/*
 			 * Link the caller-supplied rdata chain for this buffer to the
 			 * overall list.
@@ -836,6 +850,13 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 	for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next)
 		COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
 
+	/*
+	 * Ensure that xlogreader.c can read the record; and check that we don't
+	 * accidentally overflow the size of the record.
+	 * */
+	if (unlikely(!AllocSizeIsValid(total_len) || total_len > UINT32_MAX))
+		elog(ERROR, "too much registered data for WAL record");
+
 	/*
 	 * Fill in the fields in the record header. Prev-link is filled in later,
 	 * once we know where in the WAL the record will be inserted. The CRC does
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index c1b1137aa7..950e1f22b0 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -37,6 +37,10 @@
  * The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
  * XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
  * used to distinguish between block references, and the main data structs.
+ *
+ * Note that xlogreader.c limits the total size of the xl_tot_len to
+ * MaxAllocSize (1GB - 1). This means that this is also the maximum size
+ * of a single WAL record - we would be unable to replay any larger record.
  */
 typedef struct XLogRecord
 {
-- 
2.30.2

Reply via email to