(cloudberry) branch cbdb-postgres-merge updated: Fixed some bugs introduced when resolving conflicts for xlog

chenjinbao1989 Fri, 12 Dec 2025 09:39:13 -0800

This is an automated email from the ASF dual-hosted git repository.

chenjinbao1989 pushed a commit to branch cbdb-postgres-merge
in repository https://gitbox.apache.org/repos/asf/cloudberry.git



The following commit(s) were added to refs/heads/cbdb-postgres-merge by this 
push:
     new d8b123ac745 Fixed some bugs introduced when resolving conflicts for 
xlog
d8b123ac745 is described below

commit d8b123ac7457d9d258f0db7c7f20446b887bdf75
Author: Jinbao Chen <[email protected]>
AuthorDate: Sat Dec 13 01:37:48 2025 +0800

    Fixed some bugs introduced when resolving conflicts for xlog
---
 src/backend/access/transam/xlog.c         | 38 +++++++++++------
 src/backend/access/transam/xlogreader.c   |  2 -
 src/backend/access/transam/xlogrecovery.c | 68 ++++++++++++++++++++++++++++++-
 src/bin/initdb/initdb.c                   |  8 +---
 4 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index 30eab6de880..f4d8116d7b1 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5434,7 +5434,29 @@ StartupXLOG(void)
                ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
        {
                RemoveTempXlogFiles();
-               SyncDataDirectory();
+               /*
+                * 1. If the backup_label file exists, we assume the pgdata has 
already
+                * been synchronized. This is true on gpdb since we do force 
fsync
+                * during pg_basebackup and pg_rewind.
+                *
+                * 2. else for the crash recovery case.
+                *
+                *    2.1. if full page writes is enabled, we do synchronize 
the wal
+                *    files only. wal files must be synchronized here, else if 
xlog
+                *    redo writes some buffer pages and those pages are partly
+                *    synchronized, and then system crashes and some xlogs are 
lost,
+                *    those table file pages might be broken.
+                *
+                *    2.2. else, simply synchronize the whole pgdata directory 
though
+                *    there might be room for optimization but we would mostly 
not run
+                *    into this code branch. Since we can not get
+                *    checkPoint.fullPageWrites here so we do pgdata fsync 
later (
+                *    i.e. call SyncDataDirectory()) after reading the 
checkpoint.
+                */
+               if (access(BACKUP_LABEL_FILE, F_OK) != 0)
+                       SyncAllXLogFiles();
+               if (Gp_role == GP_ROLE_DISPATCH)
+                       *shmCleanupBackends = true;
                didCrash = true;
        }
        else
@@ -5455,8 +5477,11 @@ StartupXLOG(void)
        /* initialize shared memory variables from the checkpoint record */
        ShmemVariableCache->nextXid = checkPoint.nextXid;
        ShmemVariableCache->nextGxid = checkPoint.nextGxid;
+       ShmemVariableCache->GxidCount = 0;
        ShmemVariableCache->nextOid = checkPoint.nextOid;
        ShmemVariableCache->oidCount = 0;
+       ShmemVariableCache->nextRelfilenode = checkPoint.nextRelfilenode;
+       ShmemVariableCache->relfilenodeCount = 0;
        MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
        AdvanceOldestClogXid(checkPoint.oldestXid);
        SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -5465,17 +5490,6 @@ StartupXLOG(void)
                                         checkPoint.newestCommitTsXid);
        XLogCtl->ckptFullXid = checkPoint.nextXid;
 
-       /*
-        * gpdb specific: Do pgdata fsync for the case that is almost not 
possible
-        * on real production scenarios. See previous code that calls
-        * SyncAllXLogFiles() for details.
-        */
-       if (!checkPoint.fullPageWrites &&
-               !haveBackupLabel &&
-               ControlFile->state != DB_SHUTDOWNED &&
-               ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
-               SyncDataDirectory();
-
        /*
         * Clear out any old relcache cache files.  This is *necessary* if we do
         * any WAL replay, since that would probably result in the cache files
diff --git a/src/backend/access/transam/xlogreader.c 
b/src/backend/access/transam/xlogreader.c
index dce2522a3ce..c2db8299fdc 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -562,7 +562,6 @@ XLogDecodeNextRecord(XLogReaderState *state, bool 
nonblocking)
        state->errormsg_buf[0] = '\0';
        decoded = NULL;
 
-       ResetDecoder(state);
        state->abortedRecPtr = InvalidXLogRecPtr;
        state->missingContrecPtr = InvalidXLogRecPtr;
 
@@ -758,7 +757,6 @@ restart:
                        if (pageHeader->xlp_info & 
XLP_FIRST_IS_OVERWRITE_CONTRECORD)
                        {
                                state->overwrittenRecPtr = RecPtr;
-                               ResetDecoder(state);
                                RecPtr = targetPagePtr;
                                goto restart;
                        }
diff --git a/src/backend/access/transam/xlogrecovery.c 
b/src/backend/access/transam/xlogrecovery.c
index a98b5e41a5d..69187a969fb 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -816,6 +816,17 @@ InitWalRecovery(ControlFileData *ControlFile, bool 
*wasShutdown_ptr,
                wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == 
XLOG_CHECKPOINT_SHUTDOWN);
        }
 
+       /*
+        * gpdb specific: Do pgdata fsync for the case that is almost not 
possible
+        * on real production scenarios. See previous code that calls
+        * SyncAllXLogFiles() for details.
+        */
+       if (!checkPoint.fullPageWrites &&
+               !haveBackupLabel &&
+               ControlFile->state != DB_SHUTDOWNED &&
+               ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
+               SyncDataDirectory();
+
        /*
         * If the location of the checkpoint record is not on the expected
         * timeline in the history of the requested timeline, we cannot proceed:
@@ -1962,6 +1973,32 @@ ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord 
*record, TimeLineID *repl
        XLogRecoveryCtl->lastReplayedTLI = *replayTLI;
        SpinLockRelease(&XLogRecoveryCtl->info_lck);
 
+       if (create_restartpoint_on_ckpt_record_replay && 
ArchiveRecoveryRequested)
+       {
+               /*
+                * Create restartpoint on checkpoint record if requested.
+                *
+                * The bgwriter creates restartpoints during archive
+                * recovery at its own leisure. But gp_replica_check fails
+                * with this, because it bypasses the shared buffer cache
+                * and reads directly from disk. So, via GUC it can
+                * request to force creating restart point mainly to flush
+                * the shared buffers to disk.
+                */
+               uint8 xlogRecInfo = record->xl_info & ~XLR_INFO_MASK;
+
+               if (record->xl_rmid == RM_XLOG_ID &&
+                       (xlogRecInfo == XLOG_CHECKPOINT_SHUTDOWN ||
+                        xlogRecInfo == XLOG_CHECKPOINT_ONLINE))
+               {
+                       if (ArchiveRecoveryRequested && IsUnderPostmaster)
+                               RequestCheckpoint(CHECKPOINT_IMMEDIATE | 
CHECKPOINT_WAIT);
+                       else
+                               elog(LOG, "Skipping CreateRestartPoint() as 
bgwriter is not launched.");
+               }
+       }
+
+
        /* ------
         * Wakeup walsenders:
         *
@@ -4034,6 +4071,11 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher, 
XLogRecPtr RecPtr,
 {
        XLogRecord *record;
        uint8           info;
+       bool sizeOk;
+       uint32 chkpt_len;
+       uint32 chkpt_hdr_len_short;
+       uint32 chkpt_hdr_len_long;
+       bool length_match;
 
        Assert(xlogreader != NULL);
 
@@ -4067,7 +4109,31 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher, 
XLogRecPtr RecPtr,
                                (errmsg("invalid xl_info in checkpoint 
record")));
                return NULL;
        }
-       if (record->xl_tot_len < SizeOfXLogRecord + 
SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
+
+       /*
+        * GPDB: Verify the Checkpoint record length. For an extended Checkpoint
+        * record (when record total length is greater than regular checkpoint
+        * record total length, e.g. in the case of containing DTX info), 
compare
+        * the difference between the regular checkpoint size and the extended
+        * variable size.
+        */
+       sizeOk = false;
+       chkpt_len = XLogRecGetDataLen(xlogreader);
+       chkpt_hdr_len_short = SizeOfXLogRecord + 
SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
+       chkpt_hdr_len_long = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderLong 
+ sizeof(CheckPoint);
+
+       if (chkpt_len > 255) /* for XLR_BLOCK_ID_DATA_LONG */
+               length_match = ((chkpt_len - sizeof(CheckPoint)) == 
(record->xl_tot_len - chkpt_hdr_len_long));
+       else /* for XLR_BLOCK_ID_DATA_SHORT */
+               length_match = ((chkpt_len - sizeof(CheckPoint)) == 
(record->xl_tot_len - chkpt_hdr_len_short));
+
+       if ((chkpt_len == sizeof(CheckPoint) && record->xl_tot_len == 
chkpt_hdr_len_short) ||
+               ((chkpt_len > sizeof(CheckPoint) &&
+                 record->xl_tot_len > chkpt_hdr_len_short &&
+                 length_match)))
+               sizeOk = true;
+
+       if (!sizeOk)
        {
                ereport(PANIC,
                                (errmsg("invalid length of checkpoint 
record")));
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 893965a2a2e..b6db630f38a 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2232,7 +2232,7 @@ make_template0(FILE *cmdfd)
        /*
         * Finally vacuum to clean up dead rows in pg_database
         */
-       // PG_CMD_PUTS("VACUUM pg_database;\n\n");
+        PG_CMD_PUTS("VACUUM pg_database;\n\n");
 }
 
 /*
@@ -2248,10 +2248,6 @@ make_postgres(FILE *cmdfd)
        PG_CMD_PUTS("CREATE DATABASE postgres OID = " 
CppAsString2(PostgresDbOid)
                                " STRATEGY = file_copy;\n\n");
        PG_CMD_PUTS("COMMENT ON DATABASE postgres IS 'default administrative 
connection database';\n\n");
-       PG_CMD_PUTS("UPDATE pg_database SET "
-                               "       datistemplate = 't' "
-                               "    WHERE datname = 'postgres';\n\n");
-       //PG_CMD_PUTS("VACUUM FULL pg_database;\n\n");
 }
 
 /*
@@ -3433,7 +3429,7 @@ initialize_data_directory(void)
         * vacuum template1 to remove the dead tuples. otherwise, some mismatch 
error 
         * will be reported in gp_replica_check.
         */
-//     vacuum_db(cmdfd);
+       vacuum_db(cmdfd);
 
        PG_CMD_CLOSE;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) branch cbdb-postgres-merge updated: Fixed some bugs introduced when resolving conflicts for xlog

Reply via email to