(airflow-site-archive) 01/01: Add consistency changes for the syncing scripts and workflows

2025-05-15 Thread potiuk
This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch consistency-changes-for-syncing-scripts
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 8ece730d7643a7d0c6ffbadfbd28a1b4d1c2938c
Author: Jarek Potiuk 
AuthorDate: Thu May 15 22:44:25 2025 -0400

Add consistency changes for the syncing scripts and workflows

* folders -> packages (which is consistent with breeze naming)
* more logical parameters for GH -> S3 sync (full sync boolean,
  better sequence of parameters)
* commit-sha -> commit-ref (because it might be not only sha)
* consistent plural in document_packages
* updated README with added information about workflows and
  --help produced by runing --help
---
 .github/workflows/github-to-s3.yml | 116 +++--
 .github/workflows/s3-to-github.yml |  54 -
 README.md  |  78 -
 scripts/github_to_s3.py|  83 +-
 scripts/s3_to_github.py|  36 ++--
 scripts/transfer_utils.py  |  42 +++---
 6 files changed, 220 insertions(+), 189 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index e02de60a30..174b99072c 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -28,24 +28,21 @@ on: # yamllint disable-line rule:truthy
   - live
   - staging
 default: live
-  sync-type:
-description: "Perform a full sync or just sync the last commit"
+  document-packages:
+description: "Packages (long or short) separated with spaces"
 required: false
-default: "single_commit"
-type: choice
-options:
-  - single_commit
-  - full_sync
-  commit-sha:
-description: "For single-commit - SHA/tag/branch (default: latest in 
selected branch)"
-required: false
-default: ""
+default: "all"
 type: string
-  document-folders:
-description: "For full-sync, you can select which packages to upload - 
space separated"
+  commit-reference:
+description: "Commit used to sync"
 required: false
-default: "all"
+default: "main"
 type: string
+  full-sync:
+description: "If specified, whole repo will be synced (not only single 
commit)."
+required: false
+default: false
+type: boolean
   processes:
 description: "Number of processes to use for syncing"
 required: false
@@ -59,24 +56,31 @@ jobs:
   - name: Summarize parameters
 id: parameters
 env:
-  DOCUMENT_FOLDERS: ${{ inputs.document-folders }}
-  SYNC_TYPE: ${{ inputs.sync-type }}
-  COMMIT_SHA: ${{ inputs.commit-sha }}
+  DOCUMENT_PACKAGES: ${{ inputs.document-packages }}
   PROCESSES: ${{ inputs.processes }}
-  DESTINATION: ${{ inputs.destination }}}
+  DESTINATION: ${{ inputs.destination }}
+  COMMIT_REFERENCE: ${{ inputs.commit-reference }}
+  FULL_SYNC: ${{ inputs.full-sync }}
 run: |
   echo "Input parameters summary"
   echo "="
-  echo "Document folders: ${DOCUMENT_FOLDER}"
-  echo "Sync type: ${SYNC_TYPE}"
-  echo "Commit SHA: ${COMMIT_SHA}"
-  echo "Processes: ${PROCESSES}"
   echo "Destination: ${DESTINATION}"
+  echo "Document packages: ${DOCUMENT_PACKAGES}"
+  echo "Full sync: ${FULL_SYNC}"
+  echo "Commit reference: ${COMMIT_REFERENCE}"
+  echo "Processes: ${PROCESSES}"
+  set -x
   if [[ "${DESTINATION}"  == "live" ]]; then
  echo 
"destination-location=s3://live-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   else
  echo 
"destination-location=s3://staging-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   fi
+  if [[ "${FULL_SYNC}" == "true" ]] ; then
+echo "sync-type=full-sync" >> ${GITHUB_OUTPUT}
+  else
+echo "sync-type=single-commit" >> ${GITHUB_OUTPUT}
+  fi
+  echo "commit-ref=${{ env.COMMIT_REFERENCE }}" >> ${GITHUB_OUTPUT}
 
   - uses: actions/checkout@v4
 # Checkout only workflow and scripts directory to run scripts from
@@ -121,53 +125,53 @@ jobs:
   sudo chown -R "${USER}" /mnt/cloned-airflow-site-archive
   ln -v -s /mnt/cloned-airflow-site-archive 
./cloned-airflow-site-archive
 
-  - name: Pre-process docs folders
+  - name: Pre-process docs packages
 env:
-  DOCUMENTS_FOLDERS: ${{ inputs.document-folders }}
-id: docs-folders-processed
+  DOCUMENTS_PACKAGES: ${{ inputs.document-packages }}
+id: docs-packages-processed
 run: |
   echo "s

(airflow-site-archive) 01/01: Add consistency changes for the syncing scripts and workflows

2025-05-15 Thread potiuk
This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch consistency-changes-for-syncing-scripts
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 52b2711fe71c5e00b487b7848c3e472904c55340
Author: Jarek Potiuk 
AuthorDate: Thu May 15 22:44:25 2025 -0400

Add consistency changes for the syncing scripts and workflows

* folders -> packages (which is consistent with breeze naming)
* more logical parameters for GH -> S3 sync (full sync boolean,
  better sequence of parameters)
* commit-sha -> commit-ref (because it might be not only sha)
* consistent plural in document_packages
* updated README with added information about workflows and
  --help produced by runing --help
---
 .github/workflows/github-to-s3.yml | 116 +++--
 .github/workflows/s3-to-github.yml |  54 -
 README.md  |  78 -
 scripts/github_to_s3.py|  83 +-
 scripts/s3_to_github.py|  36 ++--
 scripts/transfer_utils.py  |  42 +++---
 6 files changed, 220 insertions(+), 189 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index e02de60a30..e3b712cb1e 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -28,23 +28,20 @@ on: # yamllint disable-line rule:truthy
   - live
   - staging
 default: live
-  sync-type:
-description: "Perform a full sync or just sync the last commit"
+  document-packages:
+description: "Packages (long or short) separated with spaces"
 required: false
-default: "single_commit"
-type: choice
-options:
-  - single_commit
-  - full_sync
-  commit-sha:
-description: "For single-commit - SHA/tag/branch (default: latest in 
selected branch)"
-required: false
-default: ""
+default: "all"
 type: string
-  document-folders:
-description: "For full-sync, you can select which packages to upload - 
space separated"
+  full-sync:
+description: "If specified, whole repo will be synced (not only single 
commit)."
 required: false
-default: "all"
+default: false
+type: boolean
+  commit-reference:
+description: "Commit used to sync if np full-sync (default HEAD of 
branch chosen)."
+required: false
+default: "HEAD"
 type: string
   processes:
 description: "Number of processes to use for syncing"
@@ -59,24 +56,31 @@ jobs:
   - name: Summarize parameters
 id: parameters
 env:
-  DOCUMENT_FOLDERS: ${{ inputs.document-folders }}
-  SYNC_TYPE: ${{ inputs.sync-type }}
-  COMMIT_SHA: ${{ inputs.commit-sha }}
+  DOCUMENT_PACKAGES: ${{ inputs.document-packages }}
   PROCESSES: ${{ inputs.processes }}
-  DESTINATION: ${{ inputs.destination }}}
+  DESTINATION: ${{ inputs.destination }}
+  COMMIT_REFERENCE: ${{ inputs.commit-reference }}
+  FULL_SYNC: ${{ inputs.full-sync }}
 run: |
   echo "Input parameters summary"
   echo "="
-  echo "Document folders: ${DOCUMENT_FOLDER}"
-  echo "Sync type: ${SYNC_TYPE}"
-  echo "Commit SHA: ${COMMIT_SHA}"
-  echo "Processes: ${PROCESSES}"
   echo "Destination: ${DESTINATION}"
+  echo "Document packages: ${DOCUMENT_PACKAGES}"
+  echo "Full sync: ${FULL_SYNC}"
+  echo "Commit reference: ${COMMIT_REFERENCE}"
+  echo "Processes: ${PROCESSES}"
+  set -x
   if [[ "${DESTINATION}"  == "live" ]]; then
  echo 
"destination-location=s3://live-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   else
  echo 
"destination-location=s3://staging-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   fi
+  if [[ "${FULL_SYNC}" == "true" ]] ; then
+echo "sync-type=full-sync" >> ${GITHUB_OUTPUT}
+  else
+echo "sync-type=single-commit" >> ${GITHUB_OUTPUT}
+  fi
+  echo "commit-ref=${{ inputs.full-sync && '' || env.COMMIT_REFERENCE 
}}" >> ${GITHUB_OUTPUT}
 
   - uses: actions/checkout@v4
 # Checkout only workflow and scripts directory to run scripts from
@@ -121,53 +125,53 @@ jobs:
   sudo chown -R "${USER}" /mnt/cloned-airflow-site-archive
   ln -v -s /mnt/cloned-airflow-site-archive 
./cloned-airflow-site-archive
 
-  - name: Pre-process docs folders
+  - name: Pre-process docs packages
 env:
-  DOCUMENTS_FOLDERS: ${{ inputs.document-folders }}
-id: docs-folders-processed
+  DOCUMENTS_PACKAGES: ${{ inputs.document-packages }}
+id: docs-p

(airflow-site-archive) 01/01: Add consistency changes for the syncing scripts and workflows

2025-05-15 Thread potiuk
This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch consistency-changes-for-syncing-scripts
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 385d74d755717a43a3d18704ce9b29ae620265cd
Author: Jarek Potiuk 
AuthorDate: Thu May 15 22:44:25 2025 -0400

Add consistency changes for the syncing scripts and workflows

* folders -> packages (which is consistent with breeze naming)
* more logical parameters for GH -> S3 sync (full sync boolean,
  better sequence of parameters)
* commit-sha -> commit-ref (because it might be not only sha)
* consistent plural in document_packages
* updated README with added information about workflows and
  --help produced by runing --help
---
 .github/workflows/github-to-s3.yml | 116 +++--
 .github/workflows/s3-to-github.yml |  54 -
 README.md  |  78 -
 scripts/github_to_s3.py|  83 +-
 scripts/s3_to_github.py|  36 ++--
 scripts/transfer_utils.py  |  42 +++---
 6 files changed, 220 insertions(+), 189 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index e02de60a30..a764c2047c 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -28,23 +28,20 @@ on: # yamllint disable-line rule:truthy
   - live
   - staging
 default: live
-  sync-type:
-description: "Perform a full sync or just sync the last commit"
+  document-packages:
+description: "Packages (long or short) separated with spaces"
 required: false
-default: "single_commit"
-type: choice
-options:
-  - single_commit
-  - full_sync
-  commit-sha:
-description: "For single-commit - SHA/tag/branch (default: latest in 
selected branch)"
-required: false
-default: ""
+default: "all"
 type: string
-  document-folders:
-description: "For full-sync, you can select which packages to upload - 
space separated"
+  full-sync:
+description: "If specified, whole repo will be synced (not only single 
commit)."
 required: false
-default: "all"
+default: false
+type: boolean
+  commit-reference:
+description: "Commit used to sync if np full-sync (default HEAD of 
branch chosen)."
+required: false
+default: "HEAD"
 type: string
   processes:
 description: "Number of processes to use for syncing"
@@ -59,24 +56,31 @@ jobs:
   - name: Summarize parameters
 id: parameters
 env:
-  DOCUMENT_FOLDERS: ${{ inputs.document-folders }}
-  SYNC_TYPE: ${{ inputs.sync-type }}
-  COMMIT_SHA: ${{ inputs.commit-sha }}
+  DOCUMENT_PACKAGES: ${{ inputs.document-packages }}
   PROCESSES: ${{ inputs.processes }}
-  DESTINATION: ${{ inputs.destination }}}
+  DESTINATION: ${{ inputs.destination }}
+  COMMIT_REFERENCE: ${{ inputs.commit-reference }}
+  FULL_SYNC: ${{ inputs.full-sync }}
 run: |
   echo "Input parameters summary"
   echo "="
-  echo "Document folders: ${DOCUMENT_FOLDER}"
-  echo "Sync type: ${SYNC_TYPE}"
-  echo "Commit SHA: ${COMMIT_SHA}"
-  echo "Processes: ${PROCESSES}"
   echo "Destination: ${DESTINATION}"
+  echo "Document packages: ${DOCUMENT_PACKAGES}"
+  echo "Full sync: ${FULL_SYNC}"
+  echo "Commit reference: ${COMMIT_REFERENCE}"
+  echo "Processes: ${PROCESSES}"
+  set -x
   if [[ "${DESTINATION}"  == "live" ]]; then
  echo 
"destination-location=s3://live-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   else
  echo 
"destination-location=s3://staging-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   fi
+  if [[ "${FULL_SYNC}" == "true" ]] ; then
+echo "sync-type=full-sync" >> ${GITHUB_OUTPUT}
+  else
+echo "sync-type=single-commit" >> ${GITHUB_OUTPUT}
+  fi
+  echo "commit-ref=${{ inputs.full-sync && '' || env.COMMIT_REFERENCE 
}}" >> ${GITHUB_OUTPUT}
 
   - uses: actions/checkout@v4
 # Checkout only workflow and scripts directory to run scripts from
@@ -121,53 +125,53 @@ jobs:
   sudo chown -R "${USER}" /mnt/cloned-airflow-site-archive
   ln -v -s /mnt/cloned-airflow-site-archive 
./cloned-airflow-site-archive
 
-  - name: Pre-process docs folders
+  - name: Pre-process docs packages
 env:
-  DOCUMENTS_FOLDERS: ${{ inputs.document-folders }}
-id: docs-folders-processed
+  DOCUMENTS_PACKAGES: ${{ inputs.document-packages }}
+id: docs-p

(airflow-site-archive) 01/01: Add consistency changes for the syncing scripts and workflows

2025-05-15 Thread potiuk
This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch consistency-changes-for-syncing-scripts
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 689cc408969aa0137dd950764615b4b38bab019d
Author: Jarek Potiuk 
AuthorDate: Thu May 15 22:44:25 2025 -0400

Add consistency changes for the syncing scripts and workflows

* folders -> packages (which is consistent with breeze naming)
* more logical parameters for GH -> S3 sync (full sync boolean,
  better sequence of parameters)
* commit-sha -> commit-ref (because it might be not only sha)
* consistent plural in document_packages
* updated README with added information about workflows and
  --help produced by runing --help
---
 .github/workflows/github-to-s3.yml | 114 +++--
 .github/workflows/s3-to-github.yml |  54 +-
 README.md  |  78 -
 scripts/github_to_s3.py|  83 ++-
 scripts/s3_to_github.py|  36 ++--
 scripts/transfer_utils.py  |  42 +++---
 6 files changed, 219 insertions(+), 188 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index e02de60a30..62bb3b65d8 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -28,23 +28,20 @@ on: # yamllint disable-line rule:truthy
   - live
   - staging
 default: live
-  sync-type:
-description: "Perform a full sync or just sync the last commit"
+  document-packages:
+description: "Packages (long or short) separated with spaces"
 required: false
-default: "single_commit"
-type: choice
-options:
-  - single_commit
-  - full_sync
-  commit-sha:
-description: "For single-commit - SHA/tag/branch (default: latest in 
selected branch)"
-required: false
-default: ""
+default: "all"
 type: string
-  document-folders:
-description: "For full-sync, you can select which packages to upload - 
space separated"
+  full-sync:
+description: "If specified, whole repo will be synced (not only single 
commit)."
 required: false
-default: "all"
+default: false
+type: boolean
+  commit-reference:
+description: "Commit used to sync if np full-sync (default HEAD of 
branch chosen)."
+required: false
+default: "HEAD"
 type: string
   processes:
 description: "Number of processes to use for syncing"
@@ -59,24 +56,31 @@ jobs:
   - name: Summarize parameters
 id: parameters
 env:
-  DOCUMENT_FOLDERS: ${{ inputs.document-folders }}
-  SYNC_TYPE: ${{ inputs.sync-type }}
-  COMMIT_SHA: ${{ inputs.commit-sha }}
+  DOCUMENT_PACKAGES: ${{ inputs.document-packages }}
   PROCESSES: ${{ inputs.processes }}
-  DESTINATION: ${{ inputs.destination }}}
+  DESTINATION: ${{ inputs.destination }}
+  COMMIT_REFERENCE: ${{ inputs.commit-reference }}
+  FULL_SYNC: ${{ inputs.full-sync }}
 run: |
   echo "Input parameters summary"
   echo "="
-  echo "Document folders: ${DOCUMENT_FOLDER}"
-  echo "Sync type: ${SYNC_TYPE}"
-  echo "Commit SHA: ${COMMIT_SHA}"
-  echo "Processes: ${PROCESSES}"
   echo "Destination: ${DESTINATION}"
+  echo "Document packages: ${DOCUMENT_PACKAGES}"
+  echo "Full sync: ${FULL_SYNC}"
+  echo "Commit reference: ${COMMIT_REFERENCE}"
+  echo "Processes: ${PROCESSES}"
+  set -x
   if [[ "${DESTINATION}"  == "live" ]]; then
  echo 
"destination-location=s3://live-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   else
  echo 
"destination-location=s3://staging-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   fi
+  if [[ "${FULL_SYNC}" == "true" ]] ; then
+echo "sync-type=full-sync" >> ${GITHUB_OUTPUT}
+  else
+echo "sync-type=single-commit" >> ${GITHUB_OUTPUT}
+  fi
+  echo "commit-ref=${{ inputs.full-sync && '' || env.COMMIT_REFERENCE 
}}" >> ${GITHUB_OUTPUT}
 
   - uses: actions/checkout@v4
 # Checkout only workflow and scripts directory to run scripts from
@@ -121,53 +125,53 @@ jobs:
   sudo chown -R "${USER}" /mnt/cloned-airflow-site-archive
   ln -v -s /mnt/cloned-airflow-site-archive 
./cloned-airflow-site-archive
 
-  - name: Pre-process docs folders
+  - name: Pre-process docs packages
 env:
-  DOCUMENTS_FOLDERS: ${{ inputs.document-folders }}
-id: docs-folders-processed
+  DOCUMENTS_PACKAGES: ${{ inputs.document-packages }}
+id: docs

(airflow-site-archive) 01/01: Add consistency changes for the syncing scripts and workflows

2025-05-15 Thread potiuk
This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch consistency-changes-for-syncing-scripts
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 37d89023860dc280d0f181fdfa097f69e24f56d9
Author: Jarek Potiuk 
AuthorDate: Thu May 15 22:44:25 2025 -0400

Add consistency changes for the syncing scripts and workflows

* folders -> packages (which is consistent with breeze naming)
* more logical parameters for GH -> S3 sync (full sync boolean,
  better sequence of parameters)
* commit-sha -> commit-ref (because it might be not only sha)
* consistent plural in document_packages
* updated README with added information about workflows and
  --help produced by runing --help
---
 .github/workflows/github-to-s3.yml | 116 +++--
 .github/workflows/s3-to-github.yml |  54 -
 README.md  |  78 -
 scripts/github_to_s3.py|  83 +-
 scripts/s3_to_github.py|  36 ++--
 scripts/transfer_utils.py  |  42 +++---
 6 files changed, 220 insertions(+), 189 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index e02de60a30..1c0582b3c3 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -28,23 +28,20 @@ on: # yamllint disable-line rule:truthy
   - live
   - staging
 default: live
-  sync-type:
-description: "Perform a full sync or just sync the last commit"
+  document-packages:
+description: "Packages (long or short) separated with spaces"
 required: false
-default: "single_commit"
-type: choice
-options:
-  - single_commit
-  - full_sync
-  commit-sha:
-description: "For single-commit - SHA/tag/branch (default: latest in 
selected branch)"
-required: false
-default: ""
+default: "all"
 type: string
-  document-folders:
-description: "For full-sync, you can select which packages to upload - 
space separated"
+  full-sync:
+description: "If specified, whole repo will be synced (not only single 
commit)."
 required: false
-default: "all"
+default: false
+type: boolean
+  commit-reference:
+description: "Commit used to sync if np full-sync (default HEAD of 
branch chosen)."
+required: false
+default: "HEAD"
 type: string
   processes:
 description: "Number of processes to use for syncing"
@@ -59,24 +56,31 @@ jobs:
   - name: Summarize parameters
 id: parameters
 env:
-  DOCUMENT_FOLDERS: ${{ inputs.document-folders }}
-  SYNC_TYPE: ${{ inputs.sync-type }}
-  COMMIT_SHA: ${{ inputs.commit-sha }}
+  DOCUMENT_PACKAGES: ${{ inputs.document-packages }}
   PROCESSES: ${{ inputs.processes }}
-  DESTINATION: ${{ inputs.destination }}}
+  DESTINATION: ${{ inputs.destination }}
+  COMMIT_REFERENCE: ${{ inputs.commit-reference }}
+  FULL_SYNC: ${{ inputs.full-sync }}
 run: |
   echo "Input parameters summary"
   echo "="
-  echo "Document folders: ${DOCUMENT_FOLDER}"
-  echo "Sync type: ${SYNC_TYPE}"
-  echo "Commit SHA: ${COMMIT_SHA}"
-  echo "Processes: ${PROCESSES}"
   echo "Destination: ${DESTINATION}"
+  echo "Document packages: ${DOCUMENT_PACKAGES}"
+  echo "Full sync: ${FULL_SYNC}"
+  echo "Commit reference: ${COMMIT_REFERENCE}"
+  echo "Processes: ${PROCESSES}"
+  set -x
   if [[ "${DESTINATION}"  == "live" ]]; then
  echo 
"destination-location=s3://live-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   else
  echo 
"destination-location=s3://staging-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   fi
+  if [[ "${FULL_SYNC}" == "true" ]] ; then
+echo "sync-type=full-sync" >> ${GITHUB_OUTPUT}
+  else
+echo "sync-type=single-commit" >> ${GITHUB_OUTPUT}
+  fi
+  echo "commit-ref=${{ inputs.full-sync && '' || env.COMMIT_REFERENCE 
}}" >> ${GITHUB_OUTPUT}
 
   - uses: actions/checkout@v4
 # Checkout only workflow and scripts directory to run scripts from
@@ -121,53 +125,53 @@ jobs:
   sudo chown -R "${USER}" /mnt/cloned-airflow-site-archive
   ln -v -s /mnt/cloned-airflow-site-archive 
./cloned-airflow-site-archive
 
-  - name: Pre-process docs folders
+  - name: Pre-process docs packages
 env:
-  DOCUMENTS_FOLDERS: ${{ inputs.document-folders }}
-id: docs-folders-processed
+  DOCUMENTS_PACKAGES: ${{ inputs.document-packages }}
+id: docs-p

(airflow-site-archive) 01/01: Add consistency changes for the syncing scripts and workflows

2025-05-15 Thread potiuk
This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch consistency-changes-for-syncing-scripts
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 54521b92f874fea180e299c48e0bd4dc705b74e3
Author: Jarek Potiuk 
AuthorDate: Thu May 15 22:44:25 2025 -0400

Add consistency changes for the syncing scripts and workflows

* folders -> packages (which is consistent with breeze naming)
* more logical parameters for GH -> S3 sync (full sync boolean,
  better sequence of parameters)
* commit-sha -> commit-ref (because it might be not only sha)
* consistent plural in document_packages
* updated README with added information about workflows and
  --help produced by runing --help
---
 .github/workflows/github-to-s3.yml | 114 +++--
 .github/workflows/s3-to-github.yml |  54 +-
 README.md  |  78 -
 scripts/github_to_s3.py|  83 ++-
 scripts/s3_to_github.py|  36 ++--
 scripts/transfer_utils.py  |  42 +++---
 6 files changed, 219 insertions(+), 188 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index e02de60a30..17ea8cd229 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -28,23 +28,20 @@ on: # yamllint disable-line rule:truthy
   - live
   - staging
 default: live
-  sync-type:
-description: "Perform a full sync or just sync the last commit"
+  document-packages:
+description: "Packages (long or short) separated with spaces"
 required: false
-default: "single_commit"
-type: choice
-options:
-  - single_commit
-  - full_sync
-  commit-sha:
-description: "For single-commit - SHA/tag/branch (default: latest in 
selected branch)"
-required: false
-default: ""
+default: "all"
 type: string
-  document-folders:
-description: "For full-sync, you can select which packages to upload - 
space separated"
+  full-sync:
+description: "If specified, whole repo will be synced (not only single 
commit)."
 required: false
-default: "all"
+default: false
+type: boolean
+  commit-reference:
+description: "Commit used to sync if np full-sync (default HEAD of 
branch chosen)."
+required: false
+default: "HEAD"
 type: string
   processes:
 description: "Number of processes to use for syncing"
@@ -59,24 +56,31 @@ jobs:
   - name: Summarize parameters
 id: parameters
 env:
-  DOCUMENT_FOLDERS: ${{ inputs.document-folders }}
-  SYNC_TYPE: ${{ inputs.sync-type }}
-  COMMIT_SHA: ${{ inputs.commit-sha }}
+  DOCUMENT_PACKAGES: ${{ inputs.document-packages }}
   PROCESSES: ${{ inputs.processes }}
   DESTINATION: ${{ inputs.destination }}}
+  COMMIT_REFERENCE: ${{ inputs.commit-reference }}
+  FULL_SYNC: ${{ inputs.full-sync }}
 run: |
   echo "Input parameters summary"
   echo "="
-  echo "Document folders: ${DOCUMENT_FOLDER}"
-  echo "Sync type: ${SYNC_TYPE}"
-  echo "Commit SHA: ${COMMIT_SHA}"
-  echo "Processes: ${PROCESSES}"
   echo "Destination: ${DESTINATION}"
+  echo "Document packages: ${DOCUMENT_PACKAGES}"
+  echo "Full sync: ${FULL_SYNC}"
+  echo "Commit reference: ${COMMIT_REFERENCE}"
+  echo "Processes: ${PROCESSES}"
+  set -x
   if [[ "${DESTINATION}"  == "live" ]]; then
  echo 
"destination-location=s3://live-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   else
  echo 
"destination-location=s3://staging-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   fi
+  if [[ "${FULL_SYNC}" == "true" ]] ; then
+echo "sync-type=full-sync" >> ${GITHUB_OUTPUT}
+  else
+echo "sync-type=single-commit" >> ${GITHUB_OUTPUT}
+  fi
+  echo "commit-ref=${{ inputs.full-sync && '' || env.COMMIT_REFERENCE 
}}" >> ${GITHUB_OUTPUT}
 
   - uses: actions/checkout@v4
 # Checkout only workflow and scripts directory to run scripts from
@@ -121,53 +125,53 @@ jobs:
   sudo chown -R "${USER}" /mnt/cloned-airflow-site-archive
   ln -v -s /mnt/cloned-airflow-site-archive 
./cloned-airflow-site-archive
 
-  - name: Pre-process docs folders
+  - name: Pre-process docs packages
 env:
-  DOCUMENTS_FOLDERS: ${{ inputs.document-folders }}
-id: docs-folders-processed
+  DOCUMENTS_PACKAGES: ${{ inputs.document-packages }}
+id: docs-packages-processed
 run: |
   ech

(airflow-site-archive) 01/01: Add consistency changes for the syncing scripts and workflows

2025-05-15 Thread potiuk
This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch consistency-changes-for-syncing-scripts
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 293da9fbd50a45b6e9a4c8816582d851230d3e9b
Author: Jarek Potiuk 
AuthorDate: Thu May 15 22:44:25 2025 -0400

Add consistency changes for the syncing scripts and workflows

* folders -> packages (which is consistent with breeze naming)
* more logical parameters for GH -> S3 sync (full sync boolean,
  better sequence of parameters)
* commit-sha -> commit-ref (because it might be not only sha)
* consistent plural in document_packages
* updated README with added information about workflows and
  --help produced by runing --help
---
 .github/workflows/github-to-s3.yml | 114 +++--
 .github/workflows/s3-to-github.yml |  54 +-
 README.md  |  78 -
 scripts/github_to_s3.py|  83 ++-
 scripts/s3_to_github.py|  36 ++--
 scripts/transfer_utils.py  |  42 +++---
 6 files changed, 219 insertions(+), 188 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index e02de60a30..385248eb39 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -28,23 +28,20 @@ on: # yamllint disable-line rule:truthy
   - live
   - staging
 default: live
-  sync-type:
-description: "Perform a full sync or just sync the last commit"
+  document-packages:
+description: "Packages (long or short) separated with spaces"
 required: false
-default: "single_commit"
-type: choice
-options:
-  - single_commit
-  - full_sync
-  commit-sha:
-description: "For single-commit - SHA/tag/branch (default: latest in 
selected branch)"
-required: false
-default: ""
+default: "all"
 type: string
-  document-folders:
-description: "For full-sync, you can select which packages to upload - 
space separated"
+  full-sync:
+description: "If specified, whole repo will be synced (not only single 
commit)."
 required: false
-default: "all"
+default: false
+type: boolean
+  commit-reference:
+description: "Commit used to sync if np full-sync (default HEAD of 
branch chosen)."
+required: false
+default: "HEAD"
 type: string
   processes:
 description: "Number of processes to use for syncing"
@@ -59,24 +56,31 @@ jobs:
   - name: Summarize parameters
 id: parameters
 env:
-  DOCUMENT_FOLDERS: ${{ inputs.document-folders }}
-  SYNC_TYPE: ${{ inputs.sync-type }}
-  COMMIT_SHA: ${{ inputs.commit-sha }}
+  DOCUMENT_PACKAGES: ${{ inputs.document-packages }}
   PROCESSES: ${{ inputs.processes }}
   DESTINATION: ${{ inputs.destination }}}
+  COMMIT_REFERENCE: ${{ inputs.commit-reference }}
+  FULL_SYNC: ${{ inputs.full-sync }}
 run: |
   echo "Input parameters summary"
   echo "="
-  echo "Document folders: ${DOCUMENT_FOLDER}"
-  echo "Sync type: ${SYNC_TYPE}"
-  echo "Commit SHA: ${COMMIT_SHA}"
-  echo "Processes: ${PROCESSES}"
   echo "Destination: ${DESTINATION}"
+  echo "Document packages: ${DOCUMENT_PACKAGES}"
+  echo "Full sync: ${FULL_SYNC}"
+  echo "Commit reference: ${COMMIT_REFERENCE}"
+  echo "Processes: ${PROCESSES}"
+  set -x
   if [[ "${DESTINATION}"  == "live" ]]; then
  echo 
"destination-location=s3://live-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   else
  echo 
"destination-location=s3://staging-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   fi
+  if [[ "${FULL_SYNC}" == "true" ]] ; then
+echo "sync-type=full-sync" >> ${GITHUB_OUTPUT}
+  else
+echo "sync-type=single-commit" >> ${GITHUB_OUTPUT}
+  fi
+  echo "commit-ref=${{ input.full-sync && '' || env.COMMIT_REFERENCE 
}}" >> ${GITHUB_OUTPUT}
 
   - uses: actions/checkout@v4
 # Checkout only workflow and scripts directory to run scripts from
@@ -121,53 +125,53 @@ jobs:
   sudo chown -R "${USER}" /mnt/cloned-airflow-site-archive
   ln -v -s /mnt/cloned-airflow-site-archive 
./cloned-airflow-site-archive
 
-  - name: Pre-process docs folders
+  - name: Pre-process docs packages
 env:
-  DOCUMENTS_FOLDERS: ${{ inputs.document-folders }}
-id: docs-folders-processed
+  DOCUMENTS_PACKAGES: ${{ inputs.document-packages }}
+id: docs-packages-processed
 run: |
   echo

(airflow-site-archive) 01/01: Add consistency changes for the syncing scripts and workflows

2025-05-15 Thread potiuk
This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch consistency-changes-for-syncing-scripts
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 320a3712dc51e21b5388a1adbd40a7b9a4c10ad6
Author: Jarek Potiuk 
AuthorDate: Thu May 15 22:44:25 2025 -0400

Add consistency changes for the syncing scripts and workflows

* folders -> packages (which is consistent with breeze naming)
* more logical parameters for GH -> S3 sync (full sync boolean,
  better sequence of parameters)
* commit-sha -> commit-ref (because it might be not only sha)
---
 .github/workflows/github-to-s3.yml | 114 +++--
 .github/workflows/s3-to-github.yml |  54 +-
 README.md  |  39 -
 scripts/github_to_s3.py|  81 +-
 scripts/s3_to_github.py|  36 ++--
 scripts/transfer_utils.py  |  42 +++---
 6 files changed, 176 insertions(+), 190 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index e02de60a30..385248eb39 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -28,23 +28,20 @@ on: # yamllint disable-line rule:truthy
   - live
   - staging
 default: live
-  sync-type:
-description: "Perform a full sync or just sync the last commit"
+  document-packages:
+description: "Packages (long or short) separated with spaces"
 required: false
-default: "single_commit"
-type: choice
-options:
-  - single_commit
-  - full_sync
-  commit-sha:
-description: "For single-commit - SHA/tag/branch (default: latest in 
selected branch)"
-required: false
-default: ""
+default: "all"
 type: string
-  document-folders:
-description: "For full-sync, you can select which packages to upload - 
space separated"
+  full-sync:
+description: "If specified, whole repo will be synced (not only single 
commit)."
 required: false
-default: "all"
+default: false
+type: boolean
+  commit-reference:
+description: "Commit used to sync if np full-sync (default HEAD of 
branch chosen)."
+required: false
+default: "HEAD"
 type: string
   processes:
 description: "Number of processes to use for syncing"
@@ -59,24 +56,31 @@ jobs:
   - name: Summarize parameters
 id: parameters
 env:
-  DOCUMENT_FOLDERS: ${{ inputs.document-folders }}
-  SYNC_TYPE: ${{ inputs.sync-type }}
-  COMMIT_SHA: ${{ inputs.commit-sha }}
+  DOCUMENT_PACKAGES: ${{ inputs.document-packages }}
   PROCESSES: ${{ inputs.processes }}
   DESTINATION: ${{ inputs.destination }}}
+  COMMIT_REFERENCE: ${{ inputs.commit-reference }}
+  FULL_SYNC: ${{ inputs.full-sync }}
 run: |
   echo "Input parameters summary"
   echo "="
-  echo "Document folders: ${DOCUMENT_FOLDER}"
-  echo "Sync type: ${SYNC_TYPE}"
-  echo "Commit SHA: ${COMMIT_SHA}"
-  echo "Processes: ${PROCESSES}"
   echo "Destination: ${DESTINATION}"
+  echo "Document packages: ${DOCUMENT_PACKAGES}"
+  echo "Full sync: ${FULL_SYNC}"
+  echo "Commit reference: ${COMMIT_REFERENCE}"
+  echo "Processes: ${PROCESSES}"
+  set -x
   if [[ "${DESTINATION}"  == "live" ]]; then
  echo 
"destination-location=s3://live-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   else
  echo 
"destination-location=s3://staging-docs-airflow-apache-org/docs/" >> 
${GITHUB_OUTPUT}
   fi
+  if [[ "${FULL_SYNC}" == "true" ]] ; then
+echo "sync-type=full-sync" >> ${GITHUB_OUTPUT}
+  else
+echo "sync-type=single-commit" >> ${GITHUB_OUTPUT}
+  fi
+  echo "commit-ref=${{ input.full-sync && '' || env.COMMIT_REFERENCE 
}}" >> ${GITHUB_OUTPUT}
 
   - uses: actions/checkout@v4
 # Checkout only workflow and scripts directory to run scripts from
@@ -121,53 +125,53 @@ jobs:
   sudo chown -R "${USER}" /mnt/cloned-airflow-site-archive
   ln -v -s /mnt/cloned-airflow-site-archive 
./cloned-airflow-site-archive
 
-  - name: Pre-process docs folders
+  - name: Pre-process docs packages
 env:
-  DOCUMENTS_FOLDERS: ${{ inputs.document-folders }}
-id: docs-folders-processed
+  DOCUMENTS_PACKAGES: ${{ inputs.document-packages }}
+id: docs-packages-processed
 run: |
   echo "sparse-checkout<> ${GITHUB_OUTPUT}
-  if [[ "${DOCUMENTS_FOLDERS}" != "all" ]]; then
-echo "Preprocessing docs folders: ${DOCUMENTS_FOLDERS