This is an automated email from the ASF dual-hosted git repository.

wangwn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


The following commit(s) were added to refs/heads/main by this push:
     new 92e337463f2 Feat: Enable hot DR cluster
92e337463f2 is described below

commit 92e337463f2246280a7b4a82086d830e4dea4b59
Author: WANG Weinan <[email protected]>
AuthorDate: Thu Jul 31 17:54:03 2025 +0800

    Feat: Enable hot DR cluster
    
    The most of feature is done by upsteam, but hot DR qd can not
    organzie cdbcomponent by `gp_segment_configuration` rel. Define
    a boolean GUC name as `hot_dr`, if the `hot_dr` enable, read
    cluster infor from segconf file.
---
 .github/workflows/build-cloudberry.yml             |  3 ++
 src/backend/access/transam/xlog.c                  | 10 ++++++
 src/backend/cdb/cdbutil.c                          | 40 +++++++++++++++++-----
 src/backend/utils/misc/guc_gp.c                    | 27 +++++++++++++++
 src/include/access/xlog.h                          |  1 +
 src/include/cdb/cdbutil.h                          |  1 +
 src/include/cdb/cdbvars.h                          |  1 +
 src/include/utils/unsync_guc_name.h                |  1 +
 .../isolation2/expected/hot_standby/faults.out     |  3 +-
 src/test/isolation2/sql/hot_standby/faults.sql     |  3 +-
 10 files changed, 80 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/build-cloudberry.yml 
b/.github/workflows/build-cloudberry.yml
index de702b5790d..fd2b9c73949 100644
--- a/.github/workflows/build-cloudberry.yml
+++ b/.github/workflows/build-cloudberry.yml
@@ -310,6 +310,9 @@ jobs:
               {"test":"ic-isolation2",
                "make_configs":["src/test/isolation2:installcheck-isolation2"]
               },
+              {"test":"ic-isolation2-hot-standby",
+               "make_configs":["src/test/isolation2:installcheck-hot-standby"]
+              },
               {"test":"ic-isolation2-crash",
                
"make_configs":["src/test/isolation2:installcheck-isolation2-crash"],
                "enable_core_check":false
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index ffc8714cf62..034aeb6473b 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -114,6 +114,7 @@ int                 XLogArchiveTimeout = 0;
 int                    XLogArchiveMode = ARCHIVE_MODE_OFF;
 char      *XLogArchiveCommand = NULL;
 bool           EnableHotStandby = false;
+bool           EnableHotDR = false;
 bool           fullPageWrites = true;
 bool           wal_log_hints = false;
 bool           wal_compression = false;
@@ -7967,6 +7968,12 @@ StartupXLOG(void)
                                if (gp_pause_on_restore_point_replay)
                                        pauseRecoveryOnRestorePoint(xlogreader);
 
+                               /* Exit the recovery loop if a promotion is 
triggered in pauseRecoveryOnRestorePoint() */
+                               if (reachedContinuousRecoveryTarget && 
recoveryTargetAction == RECOVERY_TARGET_ACTION_PROMOTE){
+                                       reachedRecoveryTarget = true;
+                                       break;
+                               }
+
                                /* Exit loop if we reached inclusive recovery 
target */
                                if (recoveryStopsAfter(xlogreader))
                                {
@@ -10757,6 +10764,9 @@ XLogRestorePoint(const char *rpName)
        xlrec.rp_time = GetCurrentTimestamp();
        strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
 
+       /* LogHotStandby for the restore here */
+       LogStandbySnapshot();
+
        XLogBeginInsert();
        XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
 
diff --git a/src/backend/cdb/cdbutil.c b/src/backend/cdb/cdbutil.c
index a241549662d..fbf3f8900f2 100644
--- a/src/backend/cdb/cdbutil.c
+++ b/src/backend/cdb/cdbutil.c
@@ -92,6 +92,7 @@ static int    CdbComponentDatabaseInfoCompare(const void *p1, 
const void *p2);
 
 static GpSegConfigEntry * readGpSegConfigFromCatalog(int *total_dbs);
 static GpSegConfigEntry * readGpSegConfigFromFTSFiles(int *total_dbs);
+static GpSegConfigEntry * readGpSegConfigFromFiles(int *total_dbs);
 
 static void getAddressesForDBid(GpSegConfigEntry *c, int elevel);
 static HTAB *hostPrimaryCountHashTableInit(void);
@@ -131,6 +132,15 @@ typedef struct HostPrimaryCountEntry
  */
 static GpSegConfigEntry *
 readGpSegConfigFromFTSFiles(int *total_dbs)
+{
+       Assert(!IsTransactionState() && !IS_HOT_DR_CLUSTER());
+       /* notify and wait FTS to finish a probe and update the dump file */
+       FtsNotifyProber();
+       return readGpSegConfigFromFiles(total_dbs);
+}
+
+static GpSegConfigEntry *
+readGpSegConfigFromFiles(int *total_dbs)
 {
        FILE    *fd;
        int             idx = 0;
@@ -142,11 +152,6 @@ readGpSegConfigFromFTSFiles(int *total_dbs)
        char    address[MAXHOSTNAMELEN];
        char    buf[MAXHOSTNAMELEN * 2 + 32];
 
-       Assert(!IsTransactionState());
-
-       /* notify and wait FTS to finish a probe and update the dump file */
-       FtsNotifyProber();      
-
        fd = AllocateFile(GPSEGCONFIGDUMPFILE, "r");
 
        if (!fd)
@@ -188,6 +193,18 @@ readGpSegConfigFromFTSFiles(int *total_dbs)
        return configs;
 }
 
+bool
+checkGpSegConfigFtsFiles()
+{
+       FILE *fd = AllocateFile(GPSEGCONFIGDUMPFILE, "r");
+
+       if (!fd)
+               return false;
+
+       FreeFile(fd);
+       return true;
+}
+
 /*
  * writeGpSegConfigToFTSFiles() dump gp_segment_configuration to the file
  * GPSEGCONFIGDUMPFILE, in $PGDATA, only FTS process can use this function.
@@ -372,10 +389,17 @@ getCdbComponentInfo(void)
 
        HTAB       *hostPrimaryCountHash = hostPrimaryCountHashTableInit();
 
-       if (IsTransactionState())
-               configs = readGpSegConfigFromCatalog(&total_dbs);
+       if (EnableHotDR)
+       {
+               configs = readGpSegConfigFromFiles(&total_dbs);
+       }
        else
-               configs = readGpSegConfigFromFTSFiles(&total_dbs);
+       {
+               if (IsTransactionState())
+                       configs = readGpSegConfigFromCatalog(&total_dbs);
+               else
+                       configs = readGpSegConfigFromFTSFiles(&total_dbs);
+       }
 
        component_databases = palloc0(sizeof(CdbComponentDatabases));
 
diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c
index c7bb596cb61..2c373e5a582 100644
--- a/src/backend/utils/misc/guc_gp.c
+++ b/src/backend/utils/misc/guc_gp.c
@@ -86,6 +86,7 @@ static bool check_optimizer(bool *newval, void **extra, 
GucSource source);
 static bool check_verify_gpfdists_cert(bool *newval, void **extra, GucSource 
source);
 static bool check_dispatch_log_stats(bool *newval, void **extra, GucSource 
source);
 static bool check_gp_workfile_compression(bool *newval, void **extra, 
GucSource source);
+static bool check_hot_dr(bool *newval, void **extra, GucSource source);
 
 /* Helper function for guc setter */
 bool gpvars_check_gp_resqueue_priority_default_value(char **newval,
@@ -3331,6 +3332,16 @@ struct config_bool ConfigureNamesBool_gp[] =
                NULL, NULL, NULL
        },
 
+       {
+               {"hot_dr", PGC_POSTMASTER, REPLICATION_STANDBY,
+                       gettext_noop("DR Cluster as well as allows connteions 
and queries"),
+                       NULL
+               },
+               &EnableHotDR,
+               false,
+               check_hot_dr, NULL, NULL
+       },
+
        {
                {"gp_enable_runtime_filter_pushdown", PGC_USERSET, 
DEVELOPER_OPTIONS,
                        gettext_noop("Try to push the hash table of hash join 
to the seqscan or AM as bloom filter."),
@@ -5455,6 +5466,22 @@ check_verify_gpfdists_cert(bool *newval, void **extra, 
GucSource source)
        return true;
 }
 
+static bool
+check_hot_dr(bool *newval, void **extra, GucSource source)
+{
+       if (*newval && !EnableHotStandby)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("cannot enable \"hot_dr\" when 
\"hot_standby\" is false")));
+
+       if (*newval && IS_QUERY_DISPATCHER() && !checkGpSegConfigFtsFiles())
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("cannot enable \"hot_dr\" since DR 
cluster segment configuration file does not exits")));
+
+       return true;
+}
+
 static bool
 check_dispatch_log_stats(bool *newval, void **extra, GucSource source)
 {
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index e8a73ceb201..6d1cc151ed2 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -123,6 +123,7 @@ extern int  XLogArchiveTimeout;
 extern int     wal_retrieve_retry_interval;
 extern char *XLogArchiveCommand;
 extern bool EnableHotStandby;
+extern bool EnableHotDR;
 
 extern bool fullPageWrites;
 extern bool wal_log_hints;
diff --git a/src/include/cdb/cdbutil.h b/src/include/cdb/cdbutil.h
index 22c3cc782d8..0f638bbd521 100644
--- a/src/include/cdb/cdbutil.h
+++ b/src/include/cdb/cdbutil.h
@@ -132,6 +132,7 @@ extern char *getDnsAddress(char *name, int port, int 
elevel);
 
 #ifdef USE_INTERNAL_FTS
 extern void writeGpSegConfigToFTSFiles(void);
+extern bool checkGpSegConfigFtsFiles(void);
 #else
 
 GpSegConfigEntry * readGpSegConfig(char * buff, int *total_dbs);
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index 2393384ec3a..534f957978d 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -757,6 +757,7 @@ extern GpId GpIdentity;
 #define MAX_DBID_STRING_LENGTH  11
 
 #define UNINITIALIZED_GP_IDENTITY_VALUE (-10000)
+#define IS_HOT_DR_CLUSTER() (EnableHotDR)
 #define IS_QUERY_DISPATCHER() (GpIdentity.segindex == MASTER_CONTENT_ID)
 #define IS_HOT_STANDBY_QD() (EnableHotStandby && IS_QUERY_DISPATCHER() && 
RecoveryInProgress())
 
diff --git a/src/include/utils/unsync_guc_name.h 
b/src/include/utils/unsync_guc_name.h
index b26c5b43c7b..37f629e6e97 100644
--- a/src/include/utils/unsync_guc_name.h
+++ b/src/include/utils/unsync_guc_name.h
@@ -294,6 +294,7 @@
                "gp_workfile_limit_per_segment",
                "gp_workfile_max_entries",
                "hba_file",
+               "hot_dr",
                "hot_standby",
                "hot_standby_feedback",
                "huge_pages",
diff --git a/src/test/isolation2/expected/hot_standby/faults.out 
b/src/test/isolation2/expected/hot_standby/faults.out
index 39f3a06cca6..2eb16b37229 100644
--- a/src/test/isolation2/expected/hot_standby/faults.out
+++ b/src/test/isolation2/expected/hot_standby/faults.out
@@ -133,7 +133,7 @@ select gp_inject_fault('out_of_recovery_in_startupxlog', 
'reset', dbid) from gp_
 ERROR:  primary segments can only process MPP protocol messages from primary 
QD  (seg1 slice1 127.0.1.1:7006 pid=14671)
 HINT:  Exit the current session and re-connect.
 -1Sq: ... <quitting>
-
+-- start_ignore
 -- will fail due to downed mirror (previous primary)
 -1S: select * from hs_failover;
 ERROR:  failed to acquire resources on one or more segments
@@ -141,6 +141,7 @@ DETAIL:  connection to server at "10.13.9.74", port 7003 
failed: Connection refu
        Is the server running on that host and accepting TCP/IP connections?
  (seg1 10.13.9.74:7003)
 -1Sq: ... <quitting>
+-- end_ignore
 
 -- bring the downed mirror up
 !\retcode gprecoverseg -aF;
diff --git a/src/test/isolation2/sql/hot_standby/faults.sql 
b/src/test/isolation2/sql/hot_standby/faults.sql
index 6e25bcba272..b1be240916a 100644
--- a/src/test/isolation2/sql/hot_standby/faults.sql
+++ b/src/test/isolation2/sql/hot_standby/faults.sql
@@ -59,10 +59,11 @@ select gp_inject_fault('out_of_recovery_in_startupxlog', 
'reset', dbid) from gp_
 -- in an existing gang. That mirror is now a primary, so it will complain and 
the query fails.
 -1S: select * from hs_failover;
 -1Sq:
-
+-- start_ignore
 -- will fail due to downed mirror (previous primary)
 -1S: select * from hs_failover;
 -1Sq:
+-- end_ignore
 
 -- bring the downed mirror up
 !\retcode gprecoverseg -aF;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to