This is an automated email from the ASF dual-hosted git repository.
chenjinbao1989 pushed a commit to branch cbdb-postgres-merge
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/cbdb-postgres-merge by this
push:
new d8b123ac745 Fixed some bugs introduced when resolving conflicts for
xlog
d8b123ac745 is described below
commit d8b123ac7457d9d258f0db7c7f20446b887bdf75
Author: Jinbao Chen <[email protected]>
AuthorDate: Sat Dec 13 01:37:48 2025 +0800
Fixed some bugs introduced when resolving conflicts for xlog
---
src/backend/access/transam/xlog.c | 38 +++++++++++------
src/backend/access/transam/xlogreader.c | 2 -
src/backend/access/transam/xlogrecovery.c | 68 ++++++++++++++++++++++++++++++-
src/bin/initdb/initdb.c | 8 +---
4 files changed, 95 insertions(+), 21 deletions(-)
diff --git a/src/backend/access/transam/xlog.c
b/src/backend/access/transam/xlog.c
index 30eab6de880..f4d8116d7b1 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5434,7 +5434,29 @@ StartupXLOG(void)
ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
{
RemoveTempXlogFiles();
- SyncDataDirectory();
+ /*
+ * 1. If the backup_label file exists, we assume the pgdata has
already
+ * been synchronized. This is true on gpdb since we do force
fsync
+ * during pg_basebackup and pg_rewind.
+ *
+ * 2. else for the crash recovery case.
+ *
+ * 2.1. if full page writes is enabled, we do synchronize
the wal
+ * files only. wal files must be synchronized here, else if
xlog
+ * redo writes some buffer pages and those pages are partly
+ * synchronized, and then system crashes and some xlogs are
lost,
+ * those table file pages might be broken.
+ *
+ * 2.2. else, simply synchronize the whole pgdata directory
though
+ * there might be room for optimization but we would mostly
not run
+ * into this code branch. Since we can not get
+ * checkPoint.fullPageWrites here so we do pgdata fsync
later (
+ * i.e. call SyncDataDirectory()) after reading the
checkpoint.
+ */
+ if (access(BACKUP_LABEL_FILE, F_OK) != 0)
+ SyncAllXLogFiles();
+ if (Gp_role == GP_ROLE_DISPATCH)
+ *shmCleanupBackends = true;
didCrash = true;
}
else
@@ -5455,8 +5477,11 @@ StartupXLOG(void)
/* initialize shared memory variables from the checkpoint record */
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextGxid = checkPoint.nextGxid;
+ ShmemVariableCache->GxidCount = 0;
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
+ ShmemVariableCache->nextRelfilenode = checkPoint.nextRelfilenode;
+ ShmemVariableCache->relfilenodeCount = 0;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -5465,17 +5490,6 @@ StartupXLOG(void)
checkPoint.newestCommitTsXid);
XLogCtl->ckptFullXid = checkPoint.nextXid;
- /*
- * gpdb specific: Do pgdata fsync for the case that is almost not
possible
- * on real production scenarios. See previous code that calls
- * SyncAllXLogFiles() for details.
- */
- if (!checkPoint.fullPageWrites &&
- !haveBackupLabel &&
- ControlFile->state != DB_SHUTDOWNED &&
- ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
- SyncDataDirectory();
-
/*
* Clear out any old relcache cache files. This is *necessary* if we do
* any WAL replay, since that would probably result in the cache files
diff --git a/src/backend/access/transam/xlogreader.c
b/src/backend/access/transam/xlogreader.c
index dce2522a3ce..c2db8299fdc 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -562,7 +562,6 @@ XLogDecodeNextRecord(XLogReaderState *state, bool
nonblocking)
state->errormsg_buf[0] = '\0';
decoded = NULL;
- ResetDecoder(state);
state->abortedRecPtr = InvalidXLogRecPtr;
state->missingContrecPtr = InvalidXLogRecPtr;
@@ -758,7 +757,6 @@ restart:
if (pageHeader->xlp_info &
XLP_FIRST_IS_OVERWRITE_CONTRECORD)
{
state->overwrittenRecPtr = RecPtr;
- ResetDecoder(state);
RecPtr = targetPagePtr;
goto restart;
}
diff --git a/src/backend/access/transam/xlogrecovery.c
b/src/backend/access/transam/xlogrecovery.c
index a98b5e41a5d..69187a969fb 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -816,6 +816,17 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) ==
XLOG_CHECKPOINT_SHUTDOWN);
}
+ /*
+ * gpdb specific: Do pgdata fsync for the case that is almost not
possible
+ * on real production scenarios. See previous code that calls
+ * SyncAllXLogFiles() for details.
+ */
+ if (!checkPoint.fullPageWrites &&
+ !haveBackupLabel &&
+ ControlFile->state != DB_SHUTDOWNED &&
+ ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
+ SyncDataDirectory();
+
/*
* If the location of the checkpoint record is not on the expected
* timeline in the history of the requested timeline, we cannot proceed:
@@ -1962,6 +1973,32 @@ ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord
*record, TimeLineID *repl
XLogRecoveryCtl->lastReplayedTLI = *replayTLI;
SpinLockRelease(&XLogRecoveryCtl->info_lck);
+ if (create_restartpoint_on_ckpt_record_replay &&
ArchiveRecoveryRequested)
+ {
+ /*
+ * Create restartpoint on checkpoint record if requested.
+ *
+ * The bgwriter creates restartpoints during archive
+ * recovery at its own leisure. But gp_replica_check fails
+ * with this, because it bypasses the shared buffer cache
+ * and reads directly from disk. So, via GUC it can
+ * request to force creating restart point mainly to flush
+ * the shared buffers to disk.
+ */
+ uint8 xlogRecInfo = record->xl_info & ~XLR_INFO_MASK;
+
+ if (record->xl_rmid == RM_XLOG_ID &&
+ (xlogRecInfo == XLOG_CHECKPOINT_SHUTDOWN ||
+ xlogRecInfo == XLOG_CHECKPOINT_ONLINE))
+ {
+ if (ArchiveRecoveryRequested && IsUnderPostmaster)
+ RequestCheckpoint(CHECKPOINT_IMMEDIATE |
CHECKPOINT_WAIT);
+ else
+ elog(LOG, "Skipping CreateRestartPoint() as
bgwriter is not launched.");
+ }
+ }
+
+
/* ------
* Wakeup walsenders:
*
@@ -4034,6 +4071,11 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
{
XLogRecord *record;
uint8 info;
+ bool sizeOk;
+ uint32 chkpt_len;
+ uint32 chkpt_hdr_len_short;
+ uint32 chkpt_hdr_len_long;
+ bool length_match;
Assert(xlogreader != NULL);
@@ -4067,7 +4109,31 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
(errmsg("invalid xl_info in checkpoint
record")));
return NULL;
}
- if (record->xl_tot_len < SizeOfXLogRecord +
SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
+
+ /*
+ * GPDB: Verify the Checkpoint record length. For an extended Checkpoint
+ * record (when record total length is greater than regular checkpoint
+ * record total length, e.g. in the case of containing DTX info),
compare
+ * the difference between the regular checkpoint size and the extended
+ * variable size.
+ */
+ sizeOk = false;
+ chkpt_len = XLogRecGetDataLen(xlogreader);
+ chkpt_hdr_len_short = SizeOfXLogRecord +
SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
+ chkpt_hdr_len_long = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderLong
+ sizeof(CheckPoint);
+
+ if (chkpt_len > 255) /* for XLR_BLOCK_ID_DATA_LONG */
+ length_match = ((chkpt_len - sizeof(CheckPoint)) ==
(record->xl_tot_len - chkpt_hdr_len_long));
+ else /* for XLR_BLOCK_ID_DATA_SHORT */
+ length_match = ((chkpt_len - sizeof(CheckPoint)) ==
(record->xl_tot_len - chkpt_hdr_len_short));
+
+ if ((chkpt_len == sizeof(CheckPoint) && record->xl_tot_len ==
chkpt_hdr_len_short) ||
+ ((chkpt_len > sizeof(CheckPoint) &&
+ record->xl_tot_len > chkpt_hdr_len_short &&
+ length_match)))
+ sizeOk = true;
+
+ if (!sizeOk)
{
ereport(PANIC,
(errmsg("invalid length of checkpoint
record")));
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 893965a2a2e..b6db630f38a 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2232,7 +2232,7 @@ make_template0(FILE *cmdfd)
/*
* Finally vacuum to clean up dead rows in pg_database
*/
- // PG_CMD_PUTS("VACUUM pg_database;\n\n");
+ PG_CMD_PUTS("VACUUM pg_database;\n\n");
}
/*
@@ -2248,10 +2248,6 @@ make_postgres(FILE *cmdfd)
PG_CMD_PUTS("CREATE DATABASE postgres OID = "
CppAsString2(PostgresDbOid)
" STRATEGY = file_copy;\n\n");
PG_CMD_PUTS("COMMENT ON DATABASE postgres IS 'default administrative
connection database';\n\n");
- PG_CMD_PUTS("UPDATE pg_database SET "
- " datistemplate = 't' "
- " WHERE datname = 'postgres';\n\n");
- //PG_CMD_PUTS("VACUUM FULL pg_database;\n\n");
}
/*
@@ -3433,7 +3429,7 @@ initialize_data_directory(void)
* vacuum template1 to remove the dead tuples. otherwise, some mismatch
error
* will be reported in gp_replica_check.
*/
-// vacuum_db(cmdfd);
+ vacuum_db(cmdfd);
PG_CMD_CLOSE;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]