This is an automated email from the ASF dual-hosted git repository.

djwang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry-backup.git


The following commit(s) were added to refs/heads/main by this push:
     new d2ea4491 CI: refactor CI workflow for cloudberry-backup (#69)
d2ea4491 is described below

commit d2ea4491bc9338ead71999b2dc66e7a1c5bfa571
Author: Dianjin Wang <[email protected]>
AuthorDate: Tue Mar 3 15:57:16 2026 +0800

    CI: refactor CI workflow for cloudberry-backup (#69)
    
    - build Cloudberry from source and publish the install as an artifact
    - run unit/integration/end_to_end/s3_plugin/scale tests in parallel jobs
    - add Apache header to the workflow file
    - capture logs and upload artifacts on failure with a test summary
    
    Reviewed by Robert Mu <[email protected]>
---
 .github/workflows/cloudberry-backup-ci.yml       | 502 +++++++++++++++++++++++
 .github/workflows/scale-tests-cloudberry-ci.bash | 175 ++++++++
 gpbackup_s3_plugin.go                            |   2 +-
 plugins/generate_minio_config.sh                 |   2 +-
 plugins/plugin_test.sh                           |   4 +-
 plugins/s3plugin/s3plugin.go                     |  10 +-
 utils/agent_remote.go                            |   7 +-
 utils/plugin.go                                  |  20 +-
 utils/util.go                                    |  19 +
 9 files changed, 723 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/cloudberry-backup-ci.yml 
b/.github/workflows/cloudberry-backup-ci.yml
new file mode 100644
index 00000000..b4502e5d
--- /dev/null
+++ b/.github/workflows/cloudberry-backup-ci.yml
@@ -0,0 +1,502 @@
+# --------------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to You under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of the
+# License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+#
+# --------------------------------------------------------------------
+# GitHub Actions Workflow: Cloudberry-backup CI
+# --------------------------------------------------------------------
+# Description:
+#
+#   Builds Apache Cloudberry from source, packages the installation,
+#   and runs Cloudberry-backup tests against a demo Cloudberry cluster.
+#
+# Workflow Overview:
+# 1. Build Cloudberry from source and upload the installation as an artifact.
+# 2. For each test target (unit, integration, end_to_end), restore the
+#    Cloudberry installation, create a demo cluster, and run tests.
+#
+# Notes:
+# - Each test job runs in an isolated environment and creates its own demo 
cluster.
+# - Artifacts are used to avoid rebuilding Cloudberry for every test target.
+# --------------------------------------------------------------------
+
+name: cloudberry-backup-ci
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+    types: [ opened, synchronize, reopened, edited, ready_for_review ]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: false
+
+permissions:
+  contents: read
+
+env:
+  CLOUDBERRY_REPO: apache/cloudberry
+  CLOUDBERRY_REF: main
+  CLOUDBERRY_DIR: cloudberry
+  CLOUDBERRY_BACKUP_DIR: cloudberry-backup
+
+jobs:
+  build_cloudberry:
+    name: Build Cloudberry From Source
+    runs-on: ubuntu-22.04
+    timeout-minutes: 180
+    container:
+      image: apache/incubator-cloudberry:cbdb-build-rocky9-latest
+      options: >-
+        --user root
+        -h cdw
+        -v /usr/share:/host_usr_share
+        -v /usr/local:/host_usr_local
+        -v /opt:/host_opt
+
+    steps:
+      - name: Free Disk Space
+        run: |
+          set -euo pipefail
+          echo "=== Disk space before cleanup ==="
+          df -h /
+
+          rm -rf /host_opt/hostedtoolcache || true
+          rm -rf /host_usr_local/lib/android || true
+          rm -rf /host_usr_share/dotnet || true
+          rm -rf /host_opt/ghc || true
+          rm -rf /host_usr_local/.ghcup || true
+          rm -rf /host_usr_share/swift || true
+          rm -rf /host_usr_local/share/powershell || true
+          rm -rf /host_usr_local/share/chromium || true
+          rm -rf /host_usr_share/miniconda || true
+          rm -rf /host_opt/az || true
+          rm -rf /host_usr_share/sbt || true
+
+          echo "=== Disk space after cleanup ==="
+          df -h /
+
+      - name: Checkout Cloudberry Source
+        uses: actions/checkout@v4
+        with:
+          repository: ${{ env.CLOUDBERRY_REPO }}
+          ref: ${{ env.CLOUDBERRY_REF }}
+          fetch-depth: 1
+          submodules: true
+          path: ${{ env.CLOUDBERRY_DIR }}
+
+      - name: Cloudberry Environment Initialization
+        env:
+          SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }}
+        run: |
+          set -euo pipefail
+          if ! su - gpadmin -c "/tmp/init_system.sh"; then
+            echo "::error::Container initialization failed"
+            exit 1
+          fi
+
+          mkdir -p "${SRC_DIR}/build-logs/details"
+          chown -R gpadmin:gpadmin .
+          chmod -R 755 .
+          chmod 777 "${SRC_DIR}/build-logs"
+          df -h /
+          rm -rf /__t/*
+          df -h /
+
+      - name: Configure Cloudberry
+        env:
+          SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }}
+        run: |
+          set -euo pipefail
+          chmod +x 
"${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"
+          if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} 
${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; 
then
+            echo "::error::Configure script failed"
+            exit 1
+          fi
+
+      - name: Build Cloudberry
+        env:
+          SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }}
+        run: |
+          set -euo pipefail
+          chmod +x 
"${SRC_DIR}/devops/build/automation/cloudberry/scripts/build-cloudberry.sh"
+          if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} 
${SRC_DIR}/devops/build/automation/cloudberry/scripts/build-cloudberry.sh"; then
+            echo "::error::Build script failed"
+            exit 1
+          fi
+
+      - name: Package Cloudberry Source
+        env:
+          SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }}
+        run: |
+          set -euo pipefail
+          tar -C "${GITHUB_WORKSPACE}" -czf cloudberry-src.tgz 
"${CLOUDBERRY_DIR}"
+
+      - name: Package Cloudberry Installation
+        run: |
+          set -euo pipefail
+          tar -C /usr/local -czf cloudberry-db.tgz cloudberry-db
+
+      - name: Upload Cloudberry Installation
+        uses: actions/upload-artifact@v4
+        with:
+          name: cloudberry-db-install
+          path: cloudberry-db.tgz
+          if-no-files-found: error
+          retention-days: 7
+
+      - name: Upload Cloudberry Source
+        uses: actions/upload-artifact@v4
+        with:
+          name: cloudberry-source
+          path: cloudberry-src.tgz
+          if-no-files-found: error
+          retention-days: 7
+
+  test_cloudberry_backup:
+    name: Cloudberry-backup Tests (${{ matrix.test_target }})
+    needs: [build_cloudberry]
+    runs-on: ubuntu-22.04
+    timeout-minutes: 180
+    container:
+      image: apache/incubator-cloudberry:cbdb-build-rocky9-latest
+      options: >-
+        --user root
+        -h cdw
+        -v /usr/share:/host_usr_share
+        -v /usr/local:/host_usr_local
+        -v /opt:/host_opt
+
+    strategy:
+      fail-fast: false
+      matrix:
+        test_target: [unit, integration, end_to_end, s3_plugin_e2e, 
regression, scale]
+
+    steps:
+      - name: Free Disk Space
+        run: |
+          set -euo pipefail
+          echo "=== Disk space before cleanup ==="
+          df -h /
+
+          rm -rf /host_opt/hostedtoolcache || true
+          rm -rf /host_usr_local/lib/android || true
+          rm -rf /host_usr_share/dotnet || true
+          rm -rf /host_opt/ghc || true
+          rm -rf /host_usr_local/.ghcup || true
+          rm -rf /host_usr_share/swift || true
+          rm -rf /host_usr_local/share/powershell || true
+          rm -rf /host_usr_local/share/chromium || true
+          rm -rf /host_usr_share/miniconda || true
+          rm -rf /host_opt/az || true
+          rm -rf /host_usr_share/sbt || true
+
+          echo "=== Disk space after cleanup ==="
+          df -h /
+
+      - name: Checkout Cloudberry-backup
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          path: ${{ env.CLOUDBERRY_BACKUP_DIR }}
+
+      - name: Download Cloudberry Installation
+        uses: actions/download-artifact@v4
+        with:
+          name: cloudberry-db-install
+
+      - name: Download Cloudberry Source
+        uses: actions/download-artifact@v4
+        with:
+          name: cloudberry-source
+
+      - name: Restore Cloudberry Installation
+        run: |
+          set -euo pipefail
+          tar -C /usr/local -xzf cloudberry-db.tgz
+
+      - name: Restore Cloudberry Source
+        run: |
+          set -euo pipefail
+          tar -C "${GITHUB_WORKSPACE}" -xzf cloudberry-src.tgz
+
+      - name: Cloudberry Environment Initialization
+        env:
+          SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }}
+        run: |
+          set -euo pipefail
+          if ! su - gpadmin -c "/tmp/init_system.sh"; then
+            echo "::error::Container initialization failed"
+            exit 1
+          fi
+
+          mkdir -p "${SRC_DIR}/build-logs/details"
+          chown -R gpadmin:gpadmin .
+          chmod -R 755 .
+          chmod 777 "${SRC_DIR}/build-logs"
+          df -h /
+          rm -rf /__t/*
+          df -h /
+
+      - name: Setup Locale for Integration Tests
+        run: |
+          # Install German locale and recompile de_DE with UTF-8 encoding 
BEFORE
+          # the cluster starts. PostgreSQL memory-maps the locale archive at
+          # startup, so localedef must run before any PG process is launched.
+          dnf install -y glibc-langpack-de
+          localedef -i de_DE -f UTF-8 de_DE
+
+      - name: Create Cloudberry Demo Cluster
+        env:
+          SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }}
+        run: |
+          set -euo pipefail
+          chmod +x 
"${SRC_DIR}/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh"
+          if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} 
${SRC_DIR}/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh";
 then
+            echo "::error::Demo cluster creation failed"
+            exit 1
+          fi
+
+      - name: Cloudberry-backup Tests
+        env:
+          CLOUDBERRY_BACKUP_SRC: ${{ github.workspace }}/${{ 
env.CLOUDBERRY_BACKUP_DIR }}
+          CLOUDBERRY_SRC: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }}
+          TEST_TARGET: ${{ matrix.test_target }}
+        run: |
+          set -euo pipefail
+          TEST_LOG_ROOT="${GITHUB_WORKSPACE}/test-logs/${TEST_TARGET}"
+          mkdir -p "${TEST_LOG_ROOT}"
+          chown -R gpadmin:gpadmin "${TEST_LOG_ROOT}"
+
+          cat <<'SCRIPT' > /tmp/run_cloudberry_backup_tests.sh
+          #!/bin/bash
+          set -euo pipefail
+
+          export GOPATH=/home/gpadmin/go
+          export PATH=/usr/local/go/bin:${GOPATH}/bin:${PATH}
+
+          source /usr/local/cloudberry-db/cloudberry-env.sh
+          source ${CLOUDBERRY_SRC}/gpAux/gpdemo/gpdemo-env.sh
+
+          pushd ${CLOUDBERRY_BACKUP_SRC}
+            make depend 2>&1 | tee 
"${TEST_LOG_ROOT}/cloudberry-backup-depend.log"
+            make build 2>&1 | tee 
"${TEST_LOG_ROOT}/cloudberry-backup-build.log"
+            make install 2>&1 | tee 
"${TEST_LOG_ROOT}/cloudberry-backup-install.log"
+
+            dummy_dir=$(find ${CLOUDBERRY_SRC} -name dummy_seclabel -type d | 
head -n 1 || true)
+            if [ -n "${dummy_dir}" ]; then
+              pushd "${dummy_dir}"
+                make install
+              popd
+              gpconfig -c shared_preload_libraries -v dummy_seclabel
+              gpstop -ra
+              gpconfig -s shared_preload_libraries | grep dummy_seclabel
+            else
+              echo "dummy_seclabel not found, skipping preload setup"
+            fi
+
+            psql postgres -c 'DROP TABLESPACE IF EXISTS test_tablespace'
+
+            set +e
+            case "${TEST_TARGET}" in
+              unit)
+                make unit 2>&1 | tee 
"${TEST_LOG_ROOT}/cloudberry-backup-unit.log"
+                ;;
+              integration)
+                make integration 2>&1 | tee 
"${TEST_LOG_ROOT}/cloudberry-backup-integration.log"
+                ;;
+              end_to_end)
+                make end_to_end 2>&1 | tee 
"${TEST_LOG_ROOT}/cloudberry-backup-end_to_end.log"
+                ;;
+              s3_plugin_e2e)
+                curl -fsSL 
https://dl.min.io/server/minio/release/linux-amd64/minio -o /tmp/minio
+                chmod +x /tmp/minio
+                mkdir -p /tmp/minio-data
+                /tmp/minio server --address ":9000" /tmp/minio-data > 
"${TEST_LOG_ROOT}/minio.log" 2>&1 &
+                for i in {1..30}; do
+                  if curl -fsS http://127.0.0.1:9000/minio/health/live 
>/dev/null; then
+                    break
+                  fi
+                  sleep 1
+                done
+
+                curl -fsSL https://dl.min.io/client/mc/release/linux-amd64/mc 
-o /tmp/mc
+                chmod +x /tmp/mc
+                /tmp/mc alias set local http://127.0.0.1:9000 minioadmin 
minioadmin
+                /tmp/mc mb --ignore-existing local/cloudberry-backup-s3-test
+
+                ${CLOUDBERRY_BACKUP_SRC}/plugins/generate_minio_config.sh
+                ${CLOUDBERRY_BACKUP_SRC}/plugins/plugin_test.sh 
"${GPHOME}/bin/gpbackup_s3_plugin" /tmp/minio_config.yaml 2>&1 | tee 
"${TEST_LOG_ROOT}/cloudberry-backup-s3-plugin-commands.log"
+
+                # Start test
+                test_db=plugin_test_db_ci
+                
backup_log="${TEST_LOG_ROOT}/cloudberry-backup-s3-plugin-gpbackup.log"
+                
restore_log="${TEST_LOG_ROOT}/cloudberry-backup-s3-plugin-gprestore.log"
+                psql -X -d postgres -qc "DROP DATABASE IF EXISTS ${test_db}" 
2>/dev/null || true
+                createdb "${test_db}"
+                psql -X -d "${test_db}" -qc "CREATE TABLE test1(i int) 
DISTRIBUTED RANDOMLY; INSERT INTO test1 SELECT generate_series(1,1000)"
+
+                # Store minio PID for cleanup
+                minio_pid=$!
+                
+                gpbackup --dbname "${test_db}" --metadata-only --plugin-config 
/tmp/minio_config.yaml > "${backup_log}" 2>&1
+                timestamp=$(grep -E "Backup Timestamp[[:space:]]*=" 
"${backup_log}" | grep -Eo "[[:digit:]]{14}" | head -n 1)
+                if [ -z "${timestamp}" ]; then
+                  latest_gpbackup_log=$(ls -1t 
"${HOME}/gpAdminLogs"/gpbackup_*.log 2>/dev/null | head -n 1 || true)
+                  if [ -n "${latest_gpbackup_log}" ]; then
+                    timestamp=$(grep -E "Backup Timestamp[[:space:]]*=" 
"${latest_gpbackup_log}" | grep -Eo "[[:digit:]]{14}" | head -n 1)
+                  fi
+                fi
+                if [ -z "${timestamp}" ]; then
+                  echo "Could not parse backup timestamp from gpbackup logs"
+                  echo "----- ${backup_log} -----"
+                  cat "${backup_log}" || true
+                  latest_gpbackup_log=$(ls -1t 
"${HOME}/gpAdminLogs"/gpbackup_*.log 2>/dev/null | head -n 1 || true)
+                  if [ -n "${latest_gpbackup_log}" ]; then
+                    echo "----- ${latest_gpbackup_log} -----"
+                    cat "${latest_gpbackup_log}" || true
+                  fi
+                  exit 1
+                fi
+
+                dropdb "${test_db}"
+                gprestore --timestamp "${timestamp}" --plugin-config 
/tmp/minio_config.yaml --create-db > "${restore_log}" 2>&1
+
+                result=$(psql -X -d "${test_db}" -tc "SELECT count(*) FROM 
pg_class WHERE relname='test1'" | xargs)
+                if [ "${result}" != "1" ]; then
+                  echo "Expected table test1 to exist after restore, got 
count=${result}"
+                  exit 1
+                fi
+                
+                # Cleanup
+                kill ${minio_pid} || true
+                wait ${minio_pid} 2>/dev/null || true
+                rm -rf /tmp/minio-data || true
+                exit 0
+                ;;
+              regression)
+                pushd ${CLOUDBERRY_SRC}/src/test/regress
+                ./pg_regress --dbname=regression --host=localhost --port=7000 
--init-file=init_file --schedule=./minimal_schedule || true
+                cat regression.diffs 2>/dev/null || true
+                popd
+
+                psql -d postgres -c 'DROP TABLESPACE IF EXISTS test_tablespace'
+
+                pg_dump regression -f /tmp/regression_schema_before.sql 
--schema-only
+
+                backup_dir=/tmp/regression_backup
+                rm -rf "${backup_dir}"
+                mkdir -p "${backup_dir}"
+
+                # Run gpbackup and capture output to extract timestamp
+                backup_log="${TEST_LOG_ROOT}/gpbackup_output.log"
+                gpbackup --dbname regression --backup-dir "${backup_dir}" 
--metadata-only 2>&1 | tee "${backup_log}"
+
+                # Extract timestamp from backup command output (most reliable)
+                timestamp=$(grep -E "Backup Timestamp[[:space:]]*=" 
"${backup_log}" | grep -Eo "[[:digit:]]{14}" | head -1)
+
+                # Fallback: Check gpAdminLogs if not found in direct output
+                if [ -z "${timestamp}" ]; then
+                  latest_gpbackup_log=$(ls -1t 
"${HOME}/gpAdminLogs"/gpbackup_*.log 2>/dev/null | head -1 || true)
+                  if [ -n "${latest_gpbackup_log}" ]; then
+                    timestamp=$(grep -E "Backup Timestamp[[:space:]]*=" 
"${latest_gpbackup_log}" | grep -Eo "[[:digit:]]{14}" | head -1)
+                  fi
+                fi
+
+                # Check if timestamp is empty
+                if [ -z "${timestamp}" ]; then
+                  echo "ERROR: Could not parse backup timestamp from gpbackup 
logs"
+                  echo "=== Final backup directory structure ==="
+                  find "${backup_dir}" -type f | sort
+                  exit 1
+                fi
+
+                echo "backup timestamp: ${timestamp}"
+
+                psql -d postgres -c 'DROP DATABASE IF EXISTS regression'
+
+                set +e
+                  gprestore --create-db --timestamp ${timestamp} --backup-dir 
"${backup_dir}" --with-globals --on-error-continue
+                set -e
+
+                pg_dump regression -f /tmp/regression_schema_after.sql 
--schema-only
+
+                set +e
+                  diff -u /tmp/regression_schema_before.sql 
/tmp/regression_schema_after.sql > /tmp/regression_schema.diff
+                  diff_status=$?
+                set -e
+
+                cp -a /tmp/regression_schema_before.sql 
"${TEST_LOG_ROOT}/regression_schema_before.sql" || true
+                cp -a /tmp/regression_schema_after.sql 
"${TEST_LOG_ROOT}/regression_schema_after.sql" || true
+                cp -a /tmp/regression_schema.diff 
"${TEST_LOG_ROOT}/regression_schema.diff" || true
+                ;;
+              scale)
+                export BACKUP_DIR=/tmp/scale_backup
+                export LOG_DIR="${TEST_LOG_ROOT}/scale"
+                mkdir -p "${LOG_DIR}"
+                chmod +x 
"${CLOUDBERRY_BACKUP_SRC}/.github/workflows/scale-tests-cloudberry-ci.bash"
+                
"${CLOUDBERRY_BACKUP_SRC}/.github/workflows/scale-tests-cloudberry-ci.bash" 
2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-scale.log"
+                ;;
+              *)
+                echo "unknown test target: ${TEST_TARGET}"
+                exit 2
+                ;;
+            esac
+            test_status=${PIPESTATUS[0]}
+            set -e
+          popd
+
+          if [ -n "${MASTER_DATA_DIRECTORY:-}" ] && [ -d 
"${MASTER_DATA_DIRECTORY}/log" ]; then
+            cp -a "${MASTER_DATA_DIRECTORY}/log" "${TEST_LOG_ROOT}/gpdb-log" 
|| true
+          fi
+          if [ -d "${CLOUDBERRY_SRC}/build-logs" ]; then
+            cp -a "${CLOUDBERRY_SRC}/build-logs" 
"${TEST_LOG_ROOT}/cloudberry-build-logs" || true
+          fi
+
+          exit ${test_status}
+          SCRIPT
+
+          chmod +x /tmp/run_cloudberry_backup_tests.sh
+          set +e
+          su - gpadmin -c "TEST_LOG_ROOT=${TEST_LOG_ROOT} 
CLOUDBERRY_BACKUP_SRC=${CLOUDBERRY_BACKUP_SRC} CLOUDBERRY_SRC=${CLOUDBERRY_SRC} 
TEST_TARGET=${TEST_TARGET} /tmp/run_cloudberry_backup_tests.sh"
+          status=$?
+          set -e
+
+          {
+            echo "## Cloudberry-backup Test Summary"
+            echo "- Target: ${TEST_TARGET}"
+            if [ ${status} -eq 0 ]; then
+              echo "- Result: PASS"
+            else
+              echo "- Result: FAIL"
+            fi
+            echo "- Logs: ${TEST_LOG_ROOT}"
+          } >> "$GITHUB_STEP_SUMMARY"
+
+          exit ${status}
+
+      - name: Upload Test Logs (On Failure)
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: cloudberry-backup-logs-${{ matrix.test_target }}
+          path: test-logs/${{ matrix.test_target }}
+          if-no-files-found: warn
+          retention-days: 7
diff --git a/.github/workflows/scale-tests-cloudberry-ci.bash 
b/.github/workflows/scale-tests-cloudberry-ci.bash
new file mode 100644
index 00000000..75ed2f7c
--- /dev/null
+++ b/.github/workflows/scale-tests-cloudberry-ci.bash
@@ -0,0 +1,175 @@
+#!/bin/bash
+
+# 
------------------------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed
+# with this work for additional information regarding copyright
+# ownership. The ASF licenses this file to You under the Apache
+# License, Version 2.0 (the "License"); you may not use this file
+# except in compliance with the License. You may obtain a copy of the
+# License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# 
------------------------------------------------------------------------------
+# Non-perf scale tests for GitHub Actions Cloudberry demo cluster.
+# This focuses on backup/restore correctness under moderate object/data scale.
+# 
------------------------------------------------------------------------------
+
+set -euo pipefail
+
+BACKUP_DIR="${BACKUP_DIR:-/tmp/scale_backup}"
+LOG_DIR="${LOG_DIR:-/tmp/scale-test-logs}"
+
+mkdir -p "${BACKUP_DIR}" "${LOG_DIR}"
+
+extract_timestamp() {
+  local log_file="$1"
+  local ts
+  ts="$(grep -E "Backup Timestamp[[:space:]]*=" "${log_file}" | grep -Eo 
"[[:digit:]]{14}" | head -n 1 || true)"
+  if [ -z "${ts}" ]; then
+    local latest_gpbackup_log
+    latest_gpbackup_log="$(ls -1t "${HOME}/gpAdminLogs"/gpbackup_*.log 
2>/dev/null | head -n 1 || true)"
+    if [ -n "${latest_gpbackup_log}" ]; then
+      ts="$(grep -E "Backup Timestamp[[:space:]]*=" "${latest_gpbackup_log}" | 
grep -Eo "[[:digit:]]{14}" | head -n 1 || true)"
+    fi
+  fi
+  if [ -z "${ts}" ]; then
+    echo "Could not parse backup timestamp from ${log_file}"
+    return 1
+  fi
+  echo "${ts}"
+}
+
+validate_datascaledb_restore() {
+  local restore_db="$1"
+  local src_tables
+  local dst_tables
+  local src_big
+  local dst_big
+
+  src_tables="$(psql -X -d datascaledb -Atc "SELECT count(*) FROM pg_class c 
JOIN pg_namespace n ON n.oid=c.relnamespace WHERE n.nspname='public' AND 
c.relkind='r'")"
+  dst_tables="$(psql -X -d "${restore_db}" -Atc "SELECT count(*) FROM pg_class 
c JOIN pg_namespace n ON n.oid=c.relnamespace WHERE n.nspname='public' AND 
c.relkind='r'")"
+  src_big="$(psql -X -d datascaledb -Atc "SELECT count(*) FROM tbl_big")"
+  dst_big="$(psql -X -d "${restore_db}" -Atc "SELECT count(*) FROM tbl_big")"
+
+  if [ "${src_tables}" != "${dst_tables}" ] || [ "${src_big}" != "${dst_big}" 
]; then
+    echo "Data scale restore validation failed for ${restore_db}"
+    echo "source tables=${src_tables}, restored tables=${dst_tables}"
+    echo "source tbl_big=${src_big}, restored tbl_big=${dst_big}"
+    return 1
+  fi
+}
+
+echo "## Preparing copy queue scale database ##"
+psql -X -d postgres -qc "DROP DATABASE IF EXISTS copyqueuedb"
+createdb copyqueuedb
+for j in $(seq 1 300); do
+  psql -X -d copyqueuedb -q -c "CREATE TABLE tbl_1k_${j}(i int) DISTRIBUTED BY 
(i);"
+  psql -X -d copyqueuedb -q -c "INSERT INTO tbl_1k_${j} SELECT 
generate_series(1,1000)"
+done
+
+echo "## Copy queue backup/restore matrix ##"
+for q in 2 4 8; do
+  b_log="${LOG_DIR}/copyqueue_backup_q${q}.log"
+  echo "Running gpbackup copy queue size ${q}"
+  gpbackup --dbname copyqueuedb --backup-dir "${BACKUP_DIR}" 
--single-data-file --no-compression --copy-queue-size "${q}" \
+    2>&1 | tee "${b_log}"
+  timestamp="$(extract_timestamp "${b_log}")"
+  restore_db="copyqueue_restore_q${q}"
+  psql -X -d postgres -qc "DROP DATABASE IF EXISTS ${restore_db}"
+  gprestore --timestamp "${timestamp}" --backup-dir "${BACKUP_DIR}" 
--create-db --redirect-db "${restore_db}" --copy-queue-size "${q}" \
+    2>&1 | tee "${LOG_DIR}/copyqueue_restore_q${q}.log"
+  src_tbl_count="$(psql -X -d copyqueuedb -Atc "SELECT count(*) FROM pg_class 
c JOIN pg_namespace n ON n.oid=c.relnamespace WHERE n.nspname='public' AND 
c.relkind='r'")"
+  dst_tbl_count="$(psql -X -d "${restore_db}" -Atc "SELECT count(*) FROM 
pg_class c JOIN pg_namespace n ON n.oid=c.relnamespace WHERE n.nspname='public' 
AND c.relkind='r'")"
+  if [ "${src_tbl_count}" != "${dst_tbl_count}" ]; then
+    echo "Copy queue restore validation failed for ${restore_db}: source 
tables=${src_tbl_count}, restored tables=${dst_tbl_count}"
+    exit 1
+  fi
+done
+
+echo "## Preparing data scale database ##"
+psql -X -d postgres -qc "DROP DATABASE IF EXISTS datascaledb"
+createdb datascaledb
+for j in $(seq 1 200); do
+  psql -X -d datascaledb -q -c "CREATE TABLE tbl_1k_${j}(i int) DISTRIBUTED BY 
(i);"
+  psql -X -d datascaledb -q -c "INSERT INTO tbl_1k_${j} SELECT 
generate_series(1,1000)"
+done
+
+psql -X -d datascaledb -q -c "CREATE TABLE tbl_big(i int) DISTRIBUTED BY (i);"
+for j in $(seq 1 25); do
+  psql -X -d datascaledb -q -c "INSERT INTO tbl_big SELECT 
generate_series(1,100000)"
+done
+
+psql -X -d datascaledb -q -c "CREATE TABLE big_partition(a int, b int, c int) 
DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (101) EVERY (1))"
+psql -X -d datascaledb -q -c "INSERT INTO big_partition SELECT i, i, i FROM 
generate_series(1,100) i"
+for j in $(seq 1 8); do
+  psql -X -d datascaledb -q -c "INSERT INTO big_partition SELECT * FROM 
big_partition"
+done
+
+echo "## Running data scale backup/restore matrix ##"
+run_data_scale_case() {
+  local case_name="$1"
+  local backup_flags="$2"
+  local restore_db="$3"
+  local jobs="$4"
+  local b_log="${LOG_DIR}/datascale_${case_name}_backup.log"
+  local r_log="${LOG_DIR}/datascale_${case_name}_restore.log"
+
+  gpbackup --dbname datascaledb --backup-dir "${BACKUP_DIR}" ${backup_flags} 
2>&1 | tee "${b_log}"
+  local ts
+  ts="$(extract_timestamp "${b_log}")"
+  psql -X -d postgres -qc "DROP DATABASE IF EXISTS ${restore_db}"
+  gprestore --timestamp "${ts}" --backup-dir "${BACKUP_DIR}" --create-db 
--redirect-db "${restore_db}" --jobs "${jobs}" \
+    2>&1 | tee "${r_log}"
+  validate_datascaledb_restore "${restore_db}"
+}
+
+run_data_scale_case "multi_data_file" "--leaf-partition-data" 
"datascale_restore_multi" "4"
+run_data_scale_case "multi_data_file_zstd" "--leaf-partition-data 
--compression-type zstd" "datascale_restore_multi_zstd" "4"
+run_data_scale_case "single_data_file" "--leaf-partition-data 
--single-data-file" "datascale_restore_single" "1"
+run_data_scale_case "single_data_file_zstd" "--leaf-partition-data 
--single-data-file --compression-type zstd" "datascale_restore_single_zstd" "1"
+
+echo "## Preparing metadata scale database ##"
+psql -X -d postgres -qc "DROP DATABASE IF EXISTS metadatascaledb"
+createdb metadatascaledb
+
+psql -X -d metadatascaledb <<'SQL'
+DO $$
+DECLARE
+  i int;
+BEGIN
+  FOR i IN 1..80 LOOP
+    EXECUTE format('CREATE SCHEMA IF NOT EXISTS s_%s', i);
+    EXECUTE format('CREATE TABLE s_%s.t_%s(id int, val text) DISTRIBUTED BY 
(id)', i, i);
+    EXECUTE format('CREATE VIEW s_%s.v_%s AS SELECT * FROM s_%s.t_%s', i, i, 
i, i);
+  END LOOP;
+END$$;
+SQL
+
+echo "## Running metadata-only backup/restore ##"
+meta_backup_log="${LOG_DIR}/metadata_backup.log"
+meta_restore_log="${LOG_DIR}/metadata_restore.log"
+gpbackup --dbname metadatascaledb --backup-dir "${BACKUP_DIR}" --metadata-only 
--verbose 2>&1 | tee "${meta_backup_log}"
+meta_ts="$(extract_timestamp "${meta_backup_log}")"
+psql -X -d postgres -qc "DROP DATABASE IF EXISTS metadatascaledb_res"
+gprestore --timestamp "${meta_ts}" --backup-dir "${BACKUP_DIR}" --redirect-db 
metadatascaledb_res --jobs 4 --create-db \
+  2>&1 | tee "${meta_restore_log}"
+
+echo "## Minimal correctness checks ##"
+src_schema_count="$(psql -X -d metadatascaledb -Atc "SELECT count(*) FROM 
pg_namespace WHERE nspname LIKE 's_%'")"
+dst_schema_count="$(psql -X -d metadatascaledb_res -Atc "SELECT count(*) FROM 
pg_namespace WHERE nspname LIKE 's_%'")"
+if [ "${src_schema_count}" != "${dst_schema_count}" ]; then
+  echo "Metadata restore schema count mismatch: src=${src_schema_count} 
dst=${dst_schema_count}"
+  exit 1
+fi
+
+echo "Scale tests completed successfully"
diff --git a/gpbackup_s3_plugin.go b/gpbackup_s3_plugin.go
index b699e715..c511dc1b 100644
--- a/gpbackup_s3_plugin.go
+++ b/gpbackup_s3_plugin.go
@@ -19,7 +19,7 @@ func main() {
                Name:  "version",
                Usage: "print version of gpbackup_s3_plugin",
        }
-       app.Version = s3plugin.Version
+       app.Version = s3plugin.GetVersion()
        app.Usage = ""
        app.UsageText = "Not supported as a standalone utility. " +
                "This plugin must be used in conjunction with gpbackup and 
gprestore."
diff --git a/plugins/generate_minio_config.sh b/plugins/generate_minio_config.sh
index b776e9ab..33b89f4e 100755
--- a/plugins/generate_minio_config.sh
+++ b/plugins/generate_minio_config.sh
@@ -6,7 +6,7 @@ options:
   endpoint: http://localhost:9000/
   aws_access_key_id: minioadmin
   aws_secret_access_key: minioadmin
-  bucket: gpbackup-s3-test
+  bucket: cloudberry-backup-s3-test
   folder: test/backup
   backup_max_concurrent_requests: 2
   backup_multipart_chunksize: 5MB
diff --git a/plugins/plugin_test.sh b/plugins/plugin_test.sh
index c793bcf0..9c1bf46c 100755
--- a/plugins/plugin_test.sh
+++ b/plugins/plugin_test.sh
@@ -40,8 +40,8 @@ testdatalarge="$testdir/testdatalarge_$time_second.txt"
 logdir="/tmp/test_bench_logs"
 
 text="this is some text"
-data=`LC_ALL=C tr -dc 'A-Za-z0-9' </dev/urandom | head -c 1000 ; echo`
-data_large=`LC_ALL=C tr -dc 'A-Za-z0-9' </dev/urandom | head -c 1000000 ; echo`
+data=`LC_ALL=C tr -dc 'A-Za-z0-9' </dev/urandom 2>/dev/null | head -c 1000 ; 
echo`
+data_large=`LC_ALL=C tr -dc 'A-Za-z0-9' </dev/urandom 2>/dev/null | head -c 
1000000 ; echo`
 mkdir -p $testdir
 mkdir -p $logdir
 echo $text > $testfile
diff --git a/plugins/s3plugin/s3plugin.go b/plugins/s3plugin/s3plugin.go
index 9d9f42f7..92f0e080 100644
--- a/plugins/s3plugin/s3plugin.go
+++ b/plugins/s3plugin/s3plugin.go
@@ -28,7 +28,15 @@ import (
        "gopkg.in/yaml.v2"
 )
 
-var Version string
+// version is set at build time via ldflags:
+//
+//     go build -ldflags "-X 
github.com/apache/cloudberry-backup/plugins/s3plugin.version=..."
+var version string
+
+// GetVersion returns the build version string injected via ldflags.
+func GetVersion() string {
+       return version
+}
 
 const apiVersion = "0.5.0"
 const Mebibyte = 1024 * 1024
diff --git a/utils/agent_remote.go b/utils/agent_remote.go
index 39a019ab..aab33f78 100644
--- a/utils/agent_remote.go
+++ b/utils/agent_remote.go
@@ -155,6 +155,7 @@ func StartGpbackupHelpers(c *cluster.Cluster, fpInfo 
filepath.FilePathInfo, oper
        defer helperMutex.Unlock()
 
        gphomePath := operating.System.Getenv("GPHOME")
+       envSourceCommand := SourceClusterEnvCommand(gphomePath)
        pluginStr := ""
        if pluginConfigFile != "" {
                _, configFilename := path.Split(pluginConfigFile)
@@ -188,12 +189,12 @@ func StartGpbackupHelpers(c *cluster.Cluster, fpInfo 
filepath.FilePathInfo, oper
                // we run these commands in sequence to ensure that any failure 
is critical; the last command ensures the agent process was successfully started
                return fmt.Sprintf(`cat << HEREDOC > %[1]s && chmod +x %[1]s && 
( nohup %[1]s &> /dev/null &)
 #!/bin/bash
-source %[2]s/greenplum_path.sh
-%[2]s/bin/%s
+%[3]s
+%[2]s/bin/%[4]s
 
 HEREDOC
 
-`, scriptFile, gphomePath, helperCmdStr)
+`, scriptFile, gphomePath, envSourceCommand, helperCmdStr)
        })
        c.CheckClusterError(remoteOutput, "Error starting gpbackup_helper 
agent", func(contentID int) string {
                return "Error starting gpbackup_helper agent"
diff --git a/utils/plugin.go b/utils/plugin.go
index d6f3259a..212ee7db 100644
--- a/utils/plugin.go
+++ b/utils/plugin.go
@@ -103,8 +103,8 @@ func (plugin *PluginConfig) CheckPluginExistsOnAllHosts(c 
*cluster.Cluster) stri
 }
 
 func (plugin *PluginConfig) checkPluginAPIVersion(c *cluster.Cluster) {
-       command := fmt.Sprintf("source %s/greenplum_path.sh && %s 
plugin_api_version",
-               operating.System.Getenv("GPHOME"), plugin.ExecutablePath)
+       command := fmt.Sprintf("%s && %s plugin_api_version",
+               SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), 
plugin.ExecutablePath)
        remoteOutput := c.GenerateAndExecuteCommand(
                "Checking plugin api version on all hosts",
                cluster.ON_HOSTS&cluster.INCLUDE_COORDINATOR,
@@ -159,8 +159,8 @@ func (plugin *PluginConfig) checkPluginAPIVersion(c 
*cluster.Cluster) {
 }
 
 func (plugin *PluginConfig) getPluginNativeVersion(c *cluster.Cluster) string {
-       command := fmt.Sprintf("source %s/greenplum_path.sh && %s --version",
-               operating.System.Getenv("GPHOME"), plugin.ExecutablePath)
+       command := fmt.Sprintf("%s && %s --version",
+               SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), 
plugin.ExecutablePath)
        remoteOutput := c.GenerateAndExecuteCommand(
                "Checking plugin version on all hosts",
                cluster.ON_HOSTS|cluster.INCLUDE_COORDINATOR,
@@ -284,8 +284,8 @@ func (plugin *PluginConfig) buildHookString(command string,
        }
 
        backupDir := fpInfo.GetDirForContent(contentID)
-       return fmt.Sprintf("source %s/greenplum_path.sh && %s %s %s %s %s %s",
-               operating.System.Getenv("GPHOME"), plugin.ExecutablePath, 
command,
+       return fmt.Sprintf("%s && %s %s %s %s %s %s",
+               SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), 
plugin.ExecutablePath, command,
                plugin.ConfigPath, backupDir, scope, contentIDStr)
 }
 
@@ -426,8 +426,8 @@ func (plugin *PluginConfig) BackupSegmentTOCs(c 
*cluster.Cluster, fpInfo filepat
        remoteOutput = c.GenerateAndExecuteCommand("Processing segment TOC 
files with plugin", cluster.ON_SEGMENTS,
                func(contentID int) string {
                        tocFile := fpInfo.GetSegmentTOCFilePath(contentID)
-                       return fmt.Sprintf("source %s/greenplum_path.sh && %s 
backup_file %s %s && "+
-                               "chmod 0755 %s", 
operating.System.Getenv("GPHOME"), plugin.ExecutablePath, plugin.ConfigPath, 
tocFile, tocFile)
+                       return fmt.Sprintf("%s && %s backup_file %s %s && "+
+                               "chmod 0755 %s", 
SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), 
plugin.ExecutablePath, plugin.ConfigPath, tocFile, tocFile)
                })
        c.CheckClusterError(remoteOutput, "Unable to process segment TOC files 
using plugin", func(contentID int) string {
                return "See gpAdminLog for gpbackup_helper on segment host for 
details: Error occurred with plugin"
@@ -445,8 +445,8 @@ func (plugin *PluginConfig) RestoreSegmentTOCs(c 
*cluster.Cluster, fpInfo filepa
                        tocFile := fpInfo.GetSegmentTOCFilePath(contentID)
                        // Restore the filename with the origin content to the 
directory with the destination content
                        tocFile = strings.ReplaceAll(tocFile, 
fmt.Sprintf("gpbackup_%d", contentID), fmt.Sprintf("gpbackup_%d", origContent))
-                       command = fmt.Sprintf("mkdir -p %s && source 
%s/greenplum_path.sh && %s restore_file %s %s",
-                               fpInfo.GetDirForContent(contentID), 
operating.System.Getenv("GPHOME"),
+                       command = fmt.Sprintf("mkdir -p %s && %s && %s 
restore_file %s %s",
+                               fpInfo.GetDirForContent(contentID), 
SourceClusterEnvCommand(operating.System.Getenv("GPHOME")),
                                plugin.ExecutablePath, plugin.ConfigPath, 
tocFile)
                        return command
                })
diff --git a/utils/util.go b/utils/util.go
index f13b1c35..c2450c53 100644
--- a/utils/util.go
+++ b/utils/util.go
@@ -35,6 +35,25 @@ func CommandExists(cmd string) bool {
        return err == nil
 }
 
+func ClusterEnvScriptPath(gphome string) string {
+       greenplumPath := path.Join(gphome, "greenplum_path.sh")
+       if FileExists(greenplumPath) {
+               return greenplumPath
+       }
+
+       cloudberryPath := path.Join(gphome, "cloudberry-env.sh")
+       if FileExists(cloudberryPath) {
+               return cloudberryPath
+       }
+
+       // Preserve previous behavior as fallback for clearer error messages 
upstream.
+       return greenplumPath
+}
+
+func SourceClusterEnvCommand(gphome string) string {
+       return fmt.Sprintf("source %s", ClusterEnvScriptPath(gphome))
+}
+
 func FileExists(filename string) bool {
        _, err := os.Stat(filename)
        return err == nil


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to