On Mon, 14 Mar 2022 at 18:14, Andres Freund <[email protected]> wrote:
>
> Hi
>
> A random thought I had while thinking about the size limits: We could use the
> low bits of the length and xl_prev to store XLR_SPECIAL_REL_UPDATE |
> XLR_CHECK_CONSISTENCY and give rmgrs the full 8 bit of xl_info. Which would
> allow us to e.g. get away from needing Heap2. Which would aestethically be
> pleasing.
That would be interesting; though out of scope for this bug I'm trying to fix.
> On 2022-03-14 17:57:23 +0100, Matthias van de Meent wrote:
> > I'm not sure whether or not to include this in the test suite, though,
> > as this would require a machine with at least 1GB of memory available
> > for this test alone, and I don't know the current requirements for
> > running the test suite.
>
> We definitely shouldn't require this much RAM for the tests.
>
> It might be worth adding tests exercising edge cases around segment boundaries
> (and perhaps page boundaries) though. E.g. record headers split across pages
> and segments.
>
>
>
> > --- a/src/backend/access/transam/xloginsert.c
> > +++ b/src/backend/access/transam/xloginsert.c
> > @@ -338,10 +338,16 @@ XLogRegisterData(char *data, int len)
> > {
> > XLogRecData *rdata;
> >
> > - Assert(begininsert_called);
> > + Assert(begininsert_called && len >= 0 && AllocSizeIsValid(len));
>
> Shouldn't we just make the length argument unsigned?
I've applied that in the attached revision; but I'd like to note that
this makes the fix less straightforward to backpatch; as the changes
to the public function signatures shouldn't be applied in older
versions.
> > - if (num_rdatas >= max_rdatas)
> > + /*
> > + * Check against max_rdatas; and ensure we don't fill a record with
> > + * more data than can be replayed
> > + */
> > + if (unlikely(num_rdatas >= max_rdatas ||
> > + !AllocSizeIsValid((uint64) mainrdata_len +
> > (uint64) len)))
> > elog(ERROR, "too much WAL data");
> > +
> > rdata = &rdatas[num_rdatas++];
>
> Personally I'd write it as unlikely(num_rdatas >= max_rdatas) || unlikely(...)
> but I doubt if it makes an actual difference to the compiler.
Agreed, updated.
> > rdata->data = data;
> > @@ -377,7 +383,7 @@ XLogRegisterBufData(uint8 block_id, char *data, int len)
> > registered_buffer *regbuf;
> > XLogRecData *rdata;
> >
> > - Assert(begininsert_called);
> > + Assert(begininsert_called && len >= 0 && len <= UINT16_MAX);
> >
> > /* find the registered buffer struct */
> > regbuf = ®istered_buffers[block_id];
> > @@ -385,8 +391,14 @@ XLogRegisterBufData(uint8 block_id, char *data, int
> > len)
> > elog(ERROR, "no block with id %d registered with WAL
> > insertion",
> > block_id);
> >
> > - if (num_rdatas >= max_rdatas)
> > + /*
> > + * Check against max_rdatas; and ensure we don't register more data
> > per
> > + * buffer than can be handled by the physical record format.
> > + */
> > + if (unlikely(num_rdatas >= max_rdatas ||
> > + regbuf->rdata_len + len > UINT16_MAX))
> > elog(ERROR, "too much WAL data");
> > +
> > rdata = &rdatas[num_rdatas++];
>
> Given the repeated check it might be worth to just put it in a static inline
> used from the relevant places (which'd generate less code because the same
> line number would be used for all the checks).
The check itself is slightly different in those 3 places; but the
error message is shared. Do you mean to extract the elog() into a
static inline function (as attached), or did I misunderstand?
-Matthias
From 9e4b70ca096469616fd1320a02cbde129a4943b6 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <[email protected]>
Date: Fri, 11 Mar 2022 16:16:22 +0100
Subject: [PATCH v3] Add protections in xlog record APIs against large numbers
and overflows.
Before this; it was possible for an extension to create malicious WAL records
that were too large to replay; or that would overflow the xl_tot_len field,
causing potential corruption in WAL record IO ops.
---
src/backend/access/transam/xloginsert.c | 56 ++++++++++++++++++++-----
src/include/access/xloginsert.h | 4 +-
src/include/access/xlogrecord.h | 4 ++
3 files changed, 52 insertions(+), 12 deletions(-)
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index c260310c4c..e87642ccae 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -127,6 +127,17 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
bool *topxid_included);
static bool XLogCompressBackupBlock(char *page, uint16 hole_offset,
uint16 hole_length, char *dest, uint16 *dlen);
+static inline void XLogErrorDataLimitExceeded();
+
+/*
+ * Error due to exceeding the maximum size of a WAL record, or registering
+ * more datas than are being accounted for by the XLog infrastructure.
+ */
+inline void
+XLogErrorDataLimitExceeded()
+{
+ elog(ERROR, "too much WAL data");
+}
/*
* Begin constructing a WAL record. This must be called before the
@@ -334,14 +345,20 @@ XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
* XLogRecGetData().
*/
void
-XLogRegisterData(char *data, int len)
+XLogRegisterData(char *data, uint32 len)
{
XLogRecData *rdata;
- Assert(begininsert_called);
+ Assert(begininsert_called && AllocSizeIsValid(len));
+
+ /*
+ * Check against max_rdatas; and ensure we don't fill a record with
+ * more data than can be replayed
+ */
+ if (unlikely(num_rdatas >= max_rdatas) ||
+ unlikely(!AllocSizeIsValid((uint64) mainrdata_len + (uint64) len)))
+ XLogErrorDataLimitExceeded();
- if (num_rdatas >= max_rdatas)
- elog(ERROR, "too much WAL data");
rdata = &rdatas[num_rdatas++];
rdata->data = data;
@@ -372,12 +389,12 @@ XLogRegisterData(char *data, int len)
* limited)
*/
void
-XLogRegisterBufData(uint8 block_id, char *data, int len)
+XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
{
registered_buffer *regbuf;
XLogRecData *rdata;
- Assert(begininsert_called);
+ Assert(begininsert_called && len <= UINT16_MAX);
/* find the registered buffer struct */
regbuf = ®istered_buffers[block_id];
@@ -385,8 +402,14 @@ XLogRegisterBufData(uint8 block_id, char *data, int len)
elog(ERROR, "no block with id %d registered with WAL insertion",
block_id);
- if (num_rdatas >= max_rdatas)
- elog(ERROR, "too much WAL data");
+ /*
+ * Check against max_rdatas; and ensure we don't register more data per
+ * buffer than can be handled by the physical record format.
+ */
+ if (unlikely(num_rdatas >= max_rdatas) ||
+ unlikely(regbuf->rdata_len + len > UINT16_MAX))
+ XLogErrorDataLimitExceeded();
+
rdata = &rdatas[num_rdatas++];
rdata->data = data;
@@ -505,7 +528,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
XLogRecPtr *fpw_lsn, int *num_fpi, bool *topxid_included)
{
XLogRecData *rdt;
- uint32 total_len = 0;
+ uint64 total_len = 0;
int block_id;
pg_crc32c rdata_crc;
registered_buffer *prev_regbuf = NULL;
@@ -734,12 +757,18 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
if (needs_data)
{
+ /*
+ * When copying to XLogRecordBlockHeader, the length is narrowed
+ * to an uint16. We double-check that that is still correct.
+ */
+ Assert(regbuf->rdata_len <= UINT16_MAX);
+
/*
* Link the caller-supplied rdata chain for this buffer to the
* overall list.
*/
bkpb.fork_flags |= BKPBLOCK_HAS_DATA;
- bkpb.data_length = regbuf->rdata_len;
+ bkpb.data_length = (uint16) regbuf->rdata_len;
total_len += regbuf->rdata_len;
rdt_datas_last->next = regbuf->rdata_head;
@@ -836,6 +865,13 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next)
COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
+ /*
+ * Ensure that xlogreader.c can read the record by ensuring that the
+ * data section of the WAL record can be allocated.
+ */
+ if (unlikely(!AllocSizeIsValid(total_len)))
+ XLogErrorDataLimitExceeded();
+
/*
* Fill in the fields in the record header. Prev-link is filled in later,
* once we know where in the WAL the record will be inserted. The CRC does
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index 5fc340c434..5ca0b0c2f0 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -43,12 +43,12 @@ extern void XLogBeginInsert(void);
extern void XLogSetRecordFlags(uint8 flags);
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
extern void XLogEnsureRecordSpace(int max_block_id, int ndatas);
-extern void XLogRegisterData(char *data, int len);
+extern void XLogRegisterData(char *data, uint32 len);
extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags);
extern void XLogRegisterBlock(uint8 block_id, RelFileNode *rnode,
ForkNumber forknum, BlockNumber blknum, char *page,
uint8 flags);
-extern void XLogRegisterBufData(uint8 block_id, char *data, int len);
+extern void XLogRegisterBufData(uint8 block_id, char *data, uint32 len);
extern void XLogResetInsertion(void);
extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index c1b1137aa7..950e1f22b0 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -37,6 +37,10 @@
* The XLogRecordBlockHeader, XLogRecordDataHeaderShort and
* XLogRecordDataHeaderLong structs all begin with a single 'id' byte. It's
* used to distinguish between block references, and the main data structs.
+ *
+ * Note that xlogreader.c limits the total size of the xl_tot_len to
+ * MaxAllocSize (1GB - 1). This means that this is also the maximum size
+ * of a single WAL record - we would be unable to replay any larger record.
*/
typedef struct XLogRecord
{
--
2.30.2