This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch change-free-type-to-choice
in repository https://gitbox.apache.org/repos/asf/airflow-site-archive.git

commit 07cc55aa17b03bc924ddfdd93d10ff4d805ada0d
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Fri May 9 19:55:17 2025 +0200

    Small improvements in github to s3 workflow
---
 .github/workflows/github-to-s3.yml | 23 +++++++++++++++++------
 scripts/github_to_s3.py            | 26 ++++++++++++++++----------
 scripts/transfer_utils.py          |  2 +-
 3 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/github-to-s3.yml 
b/.github/workflows/github-to-s3.yml
index f92e69e300..f81fa56b07 100644
--- a/.github/workflows/github-to-s3.yml
+++ b/.github/workflows/github-to-s3.yml
@@ -33,16 +33,21 @@ on: # yamllint disable-line rule:truthy
       document-folder:
         description: "Provide any specific package document folder to sync"
         required: false
-        default: "NO_DOCS"
+        default: ""
         type: string
       sync-type:
         description: "Perform a full sync or just sync the last commit"
         required: false
-        default: "last_commit"
+        default: "single_commit"
         type: choice
         options:
-          - last_commit
+          - single_commit
           - full_sync
+      commit-sha:
+        description: "If specified, commit SHA used for single_commit (default 
is latest commit)"
+        required: false
+        default: ""
+        type: string
       processes:
         description: "Number of processes to use for syncing"
         required: false
@@ -87,13 +92,19 @@ jobs:
         run: |
           git clone --depth 2 
https://github.com/apache/airflow-site-archive.git /mnt/airflow-site-archive
 
-      - name: "Syncing ${{ github.ref }} ( ${{ github.sha }} )"
+      - name: "Syncing ( ${{ inputs.commit-sha || github.sha }} ): ${{ 
inputs.sync-type }} ${{inputs.document-folder}}"
         env:
-          COMMIT_SHA: ${{ github.sha }}
+          COMMIT_SHA: ${{ inputs.commit-sha || github.sha }}
+          SYNC_TYPE: ${{ inputs.sync-type }}
         run: |
+          if [[ "${SYNC_TYPE}" == "single_commit" ]]; then
+            echo "Syncing ${COMMIT_SHA}"
+          else
+            echo "Syncing whole repo"
+          fi
           ls -la
           python3 -m pip install uv
           uv run ./scripts/github_to_s3.py --bucket-path 
${{inputs.destination-location}} --local-path ${{inputs.local-path}} \
-          --document-folder ${{inputs.document-folder}} --commit-sha 
${COMMIT_SHA} --sync-type ${{ inputs.sync-type }} \
+          --document-folder ${{ inputs.document-folder || 'NO_DOCS' }} 
--commit-sha ${COMMIT_SHA} --sync-type ${{ inputs.sync-type }} \
           --processes ${{ inputs.processes }}
         working-directory: /mnt/airflow-site-archive
diff --git a/scripts/github_to_s3.py b/scripts/github_to_s3.py
index 3f00f4d8cc..7760d74334 100644
--- a/scripts/github_to_s3.py
+++ b/scripts/github_to_s3.py
@@ -40,7 +40,7 @@ class GithubToS3(CommonTransferUtils):
         super().__init__(bucket, local_path)
 
     @staticmethod
-    def fetch_last_commit_files(commit_sha, diff_filter="ACM"):
+    def fetch_commit_files(commit_sha, diff_filter="ACM"):
         console.print(f"[blue] Fetching files from last commit {commit_sha} 
[/]")
         cmd = [
             "git",
@@ -61,7 +61,7 @@ class GithubToS3(CommonTransferUtils):
             return []
         return result.stdout.splitlines() if result.stdout else []
 
-    def sync_last_commit_files(self, commit_sha: str, processes: int):
+    def sync_single_commit_files(self, commit_sha: str, processes: int):
         '''
         There are two parts here.
         1. When any file gets removed under docs folder, we will remove from 
target location
@@ -70,8 +70,8 @@ class GithubToS3(CommonTransferUtils):
         # Fetching `d` excludes deleted files
         # Fetching `D` includes deleted files
 
-        files_cp_required = self.fetch_last_commit_files(commit_sha, 
diff_filter="d")
-        files_del_required = self.fetch_last_commit_files(commit_sha, 
diff_filter="D")
+        files_cp_required = self.fetch_commit_files(commit_sha, 
diff_filter="d")
+        files_del_required = self.fetch_commit_files(commit_sha, 
diff_filter="D")
 
         files_cp_required_under_docs = [f for f in files_cp_required if 
f.startswith("docs-archive/")]
         files_del_required_required_under_docs = [f for f in 
files_del_required if f.startswith("docs-archive/")]
@@ -102,15 +102,19 @@ class GithubToS3(CommonTransferUtils):
 
         self.run_with_pool(self.sync, pool_args, processes=processes)
 
+def convert_short_name_to_folder_name(short_name: str):
+    if not short_name.startswith("apache-airflow-providers-"):
+        return f"apache-airflow-providers-{short_name.replace('.', '-')}"
+    return short_name
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Sync GitHub to S3")
     parser.add_argument("--bucket-path", required=True, help="S3 bucket name 
with path")
     parser.add_argument("--local-path", required=True, help="local path to 
sync")
-    parser.add_argument("--document-folder", help="Document folder to sync", 
default="")
+    parser.add_argument("--document-folder", help="Document folder to sync (or 
short provider-id)", default="")
     parser.add_argument("--commit-sha", help="Commit SHA to sync", default="")
-    parser.add_argument("--sync-type", help="Sync type", default="last_commit")
+    parser.add_argument("--sync-type", help="Sync type", 
default="single_commit")
     parser.add_argument("--processes", help="Number of processes", type=int, 
default=8)
 
     args = parser.parse_args()
@@ -122,19 +126,21 @@ if __name__ == "__main__":
 
     if document_folder and document_folder != "NO_DOCS":
         full_local_path = Path(f"{args.local_path}/{document_folder}")
+        if not full_local_path.exists():
+            full_local_path = 
Path(f"{args.local_path}/{convert_short_name_to_folder_name(document_folder)}")
         if full_local_path.exists():
             console.print(f"[blue] Document folder {document_folder} exists in 
bucket {args.bucket_path}.[/]")
 
             destination = 
f"s3://{syncer.bucket_name}/{syncer.prefix}".rstrip("/") + "/" + document_folder
-            syncer.sync(source=full_local_path, destination=destination)
+            syncer.sync(source=full_local_path.as_posix(), 
destination=destination)
             sys.exit(0)
         else:
-            console.print(f"[red] Document folder {document_folder} does not 
exist in github {args.local_path}.[/]")
+            console.print(f"[red] Document folder {full_local_path} does not 
exist.[/]")
             sys.exit(1)
 
-    if args.sync_type == "last_commit" and args.commit_sha:
+    if args.sync_type == "single_commit" and args.commit_sha:
         console.print(f"[blue] Syncing last commit {args.commit_sha} from 
{args.local_path} [/]")
-        syncer.sync_last_commit_files(args.commit_sha, 
processes=int(args.processes))
+        syncer.sync_single_commit_files(args.commit_sha, 
processes=int(args.processes))
         sys.exit(0)
 
     if args.sync_type == "full_sync":
diff --git a/scripts/transfer_utils.py b/scripts/transfer_utils.py
index 132ff00bd2..bf73d0f6dd 100644
--- a/scripts/transfer_utils.py
+++ b/scripts/transfer_utils.py
@@ -63,7 +63,7 @@ class CommonTransferUtils:
             console.print(f"[yellow] Error: {e}[/]")
             return []
 
-    def sync(self, source, destination):
+    def sync(self, source: str, destination: str):
 
         console.print(f"[blue] Syncing {source} to {destination} [/]")
 

Reply via email to