tvalentyn commented on a change in pull request #12150:
URL: https://github.com/apache/beam/pull/12150#discussion_r449178144



##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"
+  )
+  return last_run
+
+
+def validate_run(run_data):
+  status = run_data["status"]
+  conclusion = run_data["conclusion"]
+  if status == "completed" and conclusion == "success":
+    return run_data
+
+  url = run_data["url"]
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=run_data["id"])
+  print(
+      f"Started waiting for Workflow run {run_data['id']} to finish. Check on 
{workflow_web_url}"
+  )
+  start_time = time.time()
+  last_request = start_time
+  spinner = itertools.cycle(["|", "/", "-", "\\"])
+
+  while True:
+    now = time.time()
+    elapsed_time = time.strftime("%H:%M:%S", time.gmtime(now - start_time))
+    print(
+        f"\r {next(spinner)} Waiting to finish. Elapsed time: {elapsed_time}. "
+        f"Current state: status: `{status}`, conclusion: `{conclusion}`.",
+        end="",
+    )
+
+    time.sleep(0.3)
+    if (now - last_request) > 10:
+      last_request = now
+      run_data = request_url(url)
+      status = run_data["status"]
+      conclusion = run_data["conclusion"]
+      if status != "completed":
+        continue
+      elif conclusion == "success":
+        print(
+            f"\rFinished in: {elapsed_time}. "
+            f"Last state: status: `{status}`, conclusion: `{conclusion}`.",
+        )
+        return run_data
+      else:
+        print("\r")
+        raise Exception(
+            f"Run unsuccessful. Conclusion: {conclusion}. Payload: {run_data}")
+
+
+def reset_directory():
+  question = (
+      f"Artifacts directory will be cleared. Is it OK for you?\n"

Review comment:
       How about:
   "Creating Artifacts directory. Any existing content in {ARTIFACTS_DIR} will 
be erased. Proceed?" 

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"
+  )
+  return last_run
+
+
+def validate_run(run_data):
+  status = run_data["status"]
+  conclusion = run_data["conclusion"]
+  if status == "completed" and conclusion == "success":
+    return run_data
+
+  url = run_data["url"]
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=run_data["id"])
+  print(
+      f"Started waiting for Workflow run {run_data['id']} to finish. Check on 
{workflow_web_url}"
+  )
+  start_time = time.time()
+  last_request = start_time
+  spinner = itertools.cycle(["|", "/", "-", "\\"])
+
+  while True:
+    now = time.time()
+    elapsed_time = time.strftime("%H:%M:%S", time.gmtime(now - start_time))
+    print(
+        f"\r {next(spinner)} Waiting to finish. Elapsed time: {elapsed_time}. "
+        f"Current state: status: `{status}`, conclusion: `{conclusion}`.",
+        end="",
+    )
+
+    time.sleep(0.3)
+    if (now - last_request) > 10:
+      last_request = now
+      run_data = request_url(url)
+      status = run_data["status"]
+      conclusion = run_data["conclusion"]
+      if status != "completed":
+        continue
+      elif conclusion == "success":
+        print(
+            f"\rFinished in: {elapsed_time}. "
+            f"Last state: status: `{status}`, conclusion: `{conclusion}`.",
+        )
+        return run_data
+      else:
+        print("\r")
+        raise Exception(
+            f"Run unsuccessful. Conclusion: {conclusion}. Payload: {run_data}")
+
+
+def reset_directory():
+  question = (
+      f"Artifacts directory will be cleared. Is it OK for you?\n"
+      f"Artifacts directory: {ARTIFACTS_DIR}\n"
+      f"Your answer")
+  if get_yes_or_no_answer(question):
+    print(f"Clearing directory: {ARTIFACTS_DIR}")
+    shutil.rmtree(ARTIFACTS_DIR, ignore_errors=True)
+    os.makedirs(ARTIFACTS_DIR)
+  else:
+    print("You said NO for clearing artifacts directory. Quitting ...")
+    quit(1)
+
+
+def download_artifacts(artifacts_url):
+  print("Starting downloading artifacts ... (it may take a while)")
+  data_artifacts = request_url(artifacts_url)
+  filtered_artifacts = [
+      a for a in data_artifacts["artifacts"] if (
+          a["name"].startswith("source_gztar_zip") or
+          a["name"].startswith("wheelhouse"))
+  ]
+  for artifact in filtered_artifacts:
+    url = artifact["archive_download_url"]
+    name = artifact["name"]
+    artifacts_size_mb = round(artifact["size_in_bytes"] / (1024 * 1024), 2)
+    print(
+        f"\tDownloading {name}.zip artifact (size: {artifacts_size_mb} 
megabytes)"
+    )
+    r = request_url(url, return_raw_request=True, allow_redirects=True)
+
+    with tempfile.NamedTemporaryFile(

Review comment:
       Are there any hashes of the artifacts to verify that the 
downloaded/extracted artifacts were not corrupted?

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):

Review comment:
       Also we may as well always return json here (see a comment below for a 
suggestion to download files using streaming).

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"
+  )
+  return last_run
+
+
+def validate_run(run_data):
+  status = run_data["status"]
+  conclusion = run_data["conclusion"]
+  if status == "completed" and conclusion == "success":
+    return run_data
+
+  url = run_data["url"]
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=run_data["id"])
+  print(
+      f"Started waiting for Workflow run {run_data['id']} to finish. Check on 
{workflow_web_url}"
+  )
+  start_time = time.time()
+  last_request = start_time
+  spinner = itertools.cycle(["|", "/", "-", "\\"])
+
+  while True:
+    now = time.time()
+    elapsed_time = time.strftime("%H:%M:%S", time.gmtime(now - start_time))
+    print(
+        f"\r {next(spinner)} Waiting to finish. Elapsed time: {elapsed_time}. "
+        f"Current state: status: `{status}`, conclusion: `{conclusion}`.",
+        end="",
+    )
+
+    time.sleep(0.3)
+    if (now - last_request) > 10:
+      last_request = now
+      run_data = request_url(url)
+      status = run_data["status"]
+      conclusion = run_data["conclusion"]
+      if status != "completed":
+        continue
+      elif conclusion == "success":
+        print(
+            f"\rFinished in: {elapsed_time}. "
+            f"Last state: status: `{status}`, conclusion: `{conclusion}`.",
+        )
+        return run_data
+      else:
+        print("\r")
+        raise Exception(
+            f"Run unsuccessful. Conclusion: {conclusion}. Payload: {run_data}")
+
+
+def reset_directory():
+  question = (
+      f"Artifacts directory will be cleared. Is it OK for you?\n"
+      f"Artifacts directory: {ARTIFACTS_DIR}\n"
+      f"Your answer")
+  if get_yes_or_no_answer(question):
+    print(f"Clearing directory: {ARTIFACTS_DIR}")
+    shutil.rmtree(ARTIFACTS_DIR, ignore_errors=True)
+    os.makedirs(ARTIFACTS_DIR)
+  else:
+    print("You said NO for clearing artifacts directory. Quitting ...")
+    quit(1)
+
+
+def download_artifacts(artifacts_url):
+  print("Starting downloading artifacts ... (it may take a while)")
+  data_artifacts = request_url(artifacts_url)
+  filtered_artifacts = [
+      a for a in data_artifacts["artifacts"] if (
+          a["name"].startswith("source_gztar_zip") or
+          a["name"].startswith("wheelhouse"))
+  ]
+  for artifact in filtered_artifacts:
+    url = artifact["archive_download_url"]
+    name = artifact["name"]
+    artifacts_size_mb = round(artifact["size_in_bytes"] / (1024 * 1024), 2)
+    print(
+        f"\tDownloading {name}.zip artifact (size: {artifacts_size_mb} 
megabytes)"
+    )
+    r = request_url(url, return_raw_request=True, allow_redirects=True)
+
+    with tempfile.NamedTemporaryFile(
+        "wb",
+        prefix=name,
+        suffix=".zip",
+    ) as f:
+      f.write(r.content)
+
+      with zipfile.ZipFile(f.name, "r") as zip_ref:
+        print(f"\tUnzipping {len(zip_ref.filelist)} files")
+        zip_ref.extractall(ARTIFACTS_DIR)
+
+
+if __name__ == "__main__":
+  print(
+      "Starting script for download GitHub Actions artifacts for Build Wheels 
workflow"
+  )
+  parse_arguments()

Review comment:
       Can we print all the provided arguments and prompt the user to confirm 
whether they are correct before proceeding?

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"

Review comment:
       What does optional mean here?
   
   Should we say: Wheels for this workflow are also available at: ... 
   If we need to support in-progress workflows, then we can print this once we 
verified the workflow has finished.

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"
+  )
+  return last_run
+
+
+def validate_run(run_data):
+  status = run_data["status"]
+  conclusion = run_data["conclusion"]
+  if status == "completed" and conclusion == "success":
+    return run_data
+
+  url = run_data["url"]
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=run_data["id"])
+  print(
+      f"Started waiting for Workflow run {run_data['id']} to finish. Check on 
{workflow_web_url}"
+  )
+  start_time = time.time()
+  last_request = start_time
+  spinner = itertools.cycle(["|", "/", "-", "\\"])
+
+  while True:
+    now = time.time()
+    elapsed_time = time.strftime("%H:%M:%S", time.gmtime(now - start_time))
+    print(
+        f"\r {next(spinner)} Waiting to finish. Elapsed time: {elapsed_time}. "
+        f"Current state: status: `{status}`, conclusion: `{conclusion}`.",
+        end="",
+    )
+
+    time.sleep(0.3)
+    if (now - last_request) > 10:
+      last_request = now
+      run_data = request_url(url)
+      status = run_data["status"]
+      conclusion = run_data["conclusion"]
+      if status != "completed":
+        continue
+      elif conclusion == "success":
+        print(
+            f"\rFinished in: {elapsed_time}. "
+            f"Last state: status: `{status}`, conclusion: `{conclusion}`.",
+        )
+        return run_data
+      else:
+        print("\r")
+        raise Exception(
+            f"Run unsuccessful. Conclusion: {conclusion}. Payload: {run_data}")
+
+
+def reset_directory():
+  question = (
+      f"Artifacts directory will be cleared. Is it OK for you?\n"
+      f"Artifacts directory: {ARTIFACTS_DIR}\n"
+      f"Your answer")
+  if get_yes_or_no_answer(question):
+    print(f"Clearing directory: {ARTIFACTS_DIR}")
+    shutil.rmtree(ARTIFACTS_DIR, ignore_errors=True)
+    os.makedirs(ARTIFACTS_DIR)
+  else:
+    print("You said NO for clearing artifacts directory. Quitting ...")
+    quit(1)

Review comment:
       use `sys.exit()` in scripts. It can also accept a message.

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):

Review comment:
       return_raw_response ?

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"
+  )
+  return last_run
+
+
+def validate_run(run_data):
+  status = run_data["status"]
+  conclusion = run_data["conclusion"]
+  if status == "completed" and conclusion == "success":
+    return run_data
+
+  url = run_data["url"]
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=run_data["id"])
+  print(
+      f"Started waiting for Workflow run {run_data['id']} to finish. Check on 
{workflow_web_url}"
+  )
+  start_time = time.time()
+  last_request = start_time
+  spinner = itertools.cycle(["|", "/", "-", "\\"])
+
+  while True:
+    now = time.time()
+    elapsed_time = time.strftime("%H:%M:%S", time.gmtime(now - start_time))
+    print(
+        f"\r {next(spinner)} Waiting to finish. Elapsed time: {elapsed_time}. "
+        f"Current state: status: `{status}`, conclusion: `{conclusion}`.",
+        end="",
+    )
+
+    time.sleep(0.3)
+    if (now - last_request) > 10:
+      last_request = now
+      run_data = request_url(url)
+      status = run_data["status"]
+      conclusion = run_data["conclusion"]
+      if status != "completed":
+        continue
+      elif conclusion == "success":
+        print(
+            f"\rFinished in: {elapsed_time}. "
+            f"Last state: status: `{status}`, conclusion: `{conclusion}`.",
+        )
+        return run_data
+      else:
+        print("\r")
+        raise Exception(
+            f"Run unsuccessful. Conclusion: {conclusion}. Payload: {run_data}")
+
+
+def reset_directory():
+  question = (
+      f"Artifacts directory will be cleared. Is it OK for you?\n"
+      f"Artifacts directory: {ARTIFACTS_DIR}\n"
+      f"Your answer")
+  if get_yes_or_no_answer(question):
+    print(f"Clearing directory: {ARTIFACTS_DIR}")
+    shutil.rmtree(ARTIFACTS_DIR, ignore_errors=True)
+    os.makedirs(ARTIFACTS_DIR)
+  else:
+    print("You said NO for clearing artifacts directory. Quitting ...")
+    quit(1)
+
+
+def download_artifacts(artifacts_url):
+  print("Starting downloading artifacts ... (it may take a while)")
+  data_artifacts = request_url(artifacts_url)
+  filtered_artifacts = [
+      a for a in data_artifacts["artifacts"] if (
+          a["name"].startswith("source_gztar_zip") or
+          a["name"].startswith("wheelhouse"))
+  ]
+  for artifact in filtered_artifacts:
+    url = artifact["archive_download_url"]
+    name = artifact["name"]
+    artifacts_size_mb = round(artifact["size_in_bytes"] / (1024 * 1024), 2)
+    print(
+        f"\tDownloading {name}.zip artifact (size: {artifacts_size_mb} 
megabytes)"
+    )
+    r = request_url(url, return_raw_request=True, allow_redirects=True)

Review comment:
       I suggest downloading files in streaming mode:
   ```
       with requests.get(url, auth=...,  stream=True) as r:
           with tempfile.NamedTemporaryFile(...) as f:
               shutil.copyfileobj(r.raw, f)
   ```
   (you could make another helper for that if you prefer).

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]

Review comment:
       Why do we want to retrieve runs on pull_requests? Shouldn't we only 
consider 'push' runs only?

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(

Review comment:
       List comprehension may be simpler to follow. How about:
   ```
   runs_for_release_commit = [r for r in runs if r.get("head_sha", "") == 
RELEASE_COMMIT]
   ```

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"
+  )
+  return last_run
+
+
+def validate_run(run_data):
+  status = run_data["status"]
+  conclusion = run_data["conclusion"]
+  if status == "completed" and conclusion == "success":
+    return run_data
+
+  url = run_data["url"]
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=run_data["id"])
+  print(
+      f"Started waiting for Workflow run {run_data['id']} to finish. Check on 
{workflow_web_url}"
+  )
+  start_time = time.time()
+  last_request = start_time
+  spinner = itertools.cycle(["|", "/", "-", "\\"])
+
+  while True:
+    now = time.time()
+    elapsed_time = time.strftime("%H:%M:%S", time.gmtime(now - start_time))
+    print(
+        f"\r {next(spinner)} Waiting to finish. Elapsed time: {elapsed_time}. "
+        f"Current state: status: `{status}`, conclusion: `{conclusion}`.",
+        end="",
+    )
+
+    time.sleep(0.3)
+    if (now - last_request) > 10:
+      last_request = now
+      run_data = request_url(url)
+      status = run_data["status"]
+      conclusion = run_data["conclusion"]
+      if status != "completed":
+        continue
+      elif conclusion == "success":
+        print(
+            f"\rFinished in: {elapsed_time}. "
+            f"Last state: status: `{status}`, conclusion: `{conclusion}`.",
+        )
+        return run_data
+      else:
+        print("\r")
+        raise Exception(
+            f"Run unsuccessful. Conclusion: {conclusion}. Payload: {run_data}")
+
+
+def reset_directory():
+  question = (
+      f"Artifacts directory will be cleared. Is it OK for you?\n"
+      f"Artifacts directory: {ARTIFACTS_DIR}\n"
+      f"Your answer")
+  if get_yes_or_no_answer(question):
+    print(f"Clearing directory: {ARTIFACTS_DIR}")
+    shutil.rmtree(ARTIFACTS_DIR, ignore_errors=True)
+    os.makedirs(ARTIFACTS_DIR)
+  else:
+    print("You said NO for clearing artifacts directory. Quitting ...")
+    quit(1)
+
+
+def download_artifacts(artifacts_url):

Review comment:
       Please add a docstring.

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")

Review comment:
       Let's s/`Uhhhh... please enter`/`Please enter 'y' or 'n' `

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])

Review comment:
       Here we are making an assumption that  response `data` will have a key 
`workflow_runs`. If this does not happen, the script will crash with a 
KeyError. It would be more helpful for debugging the script if we failed with a 
more meaningful error and let the user know that we sent a request X and 
expected a reply to have fields Y, Z,... but got ... instead. 
   
   Same comment applies to other places where we use request_url.
   
   Consider adding a check for expected output keys to request_url method.

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):

Review comment:
       The script would be easier to follow if we explicitly state the 
parameters here instead of passing them as global vars:
   ```
   def get_last_run(workflow_id, release_branch_name, commit_hash)
   ```
   Also instead of retrieving all data associated with the run and passing the 
jsons of undefied structure between functions,  consider using:
   
   ```
   def get_last_run_id(workflow_id, release_branch_name, commit_hash)
     """ Retrieves the latest github actions run id for specified commit on a 
branch.""
   
   def get_artifacts_url(run_id):
      """Returns artifacts url associated with GitHub action run."
     
   def wait_for_run_completion(run_id)
        """Waits for run to complete if it is in progress, and verifies it 
completed successfully.""
   ```
   If you want to save api calls, you can still nest helpers inside functions 
that use them and access api response from the outter function context.

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";

Review comment:
       I think this is a run ID, not a workflow ID, let's not confuse the two.
   My understanding is that we have several workflows, one of them is "Build 
wheels", it has a (probably fixed) ID.
   There are multiple runs in  "Build wheels" workflow, and each run has its 
own Run ID.

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"
+  )
+  return last_run
+
+
+def validate_run(run_data):
+  status = run_data["status"]
+  conclusion = run_data["conclusion"]
+  if status == "completed" and conclusion == "success":
+    return run_data
+
+  url = run_data["url"]
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=run_data["id"])
+  print(
+      f"Started waiting for Workflow run {run_data['id']} to finish. Check on 
{workflow_web_url}"
+  )
+  start_time = time.time()
+  last_request = start_time
+  spinner = itertools.cycle(["|", "/", "-", "\\"])
+
+  while True:
+    now = time.time()
+    elapsed_time = time.strftime("%H:%M:%S", time.gmtime(now - start_time))
+    print(
+        f"\r {next(spinner)} Waiting to finish. Elapsed time: {elapsed_time}. "
+        f"Current state: status: `{status}`, conclusion: `{conclusion}`.",
+        end="",
+    )
+
+    time.sleep(0.3)
+    if (now - last_request) > 10:
+      last_request = now
+      run_data = request_url(url)
+      status = run_data["status"]
+      conclusion = run_data["conclusion"]
+      if status != "completed":

Review comment:
       What are other options besides 'completed' that we expect to support? 
It's better to be explicit:
   
              ....
         while( status == "in-progress"):
               # wait
          if status == "completed":
              # return  artifacts_url
          else:
              # error      
            

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):

Review comment:
       also please add a docstring.

##########
File path: release/src/main/scripts/download_github_actions_artifacts.py
##########
@@ -0,0 +1,234 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Script for downloading GitHub Actions artifacts from 'Build python wheels' 
workflow."""
+import argparse
+import itertools
+import os
+import shutil
+import tempfile
+import time
+import zipfile
+
+import dateutil.parser
+import requests
+
+GH_API_URL_WORKLOW_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/build_wheels.yml";
+)
+GH_API_URL_WORKFLOW_RUNS_FMT = (
+    
"https://api.github.com/repos/{repo_url}/actions/workflows/{workflow_id}/runs";
+)
+GH_WEB_URL_WORKLOW_RUN_FMT = 
"https://github.com/{repo_url}/actions/runs/{workflow_id}";
+
+
+def parse_arguments():
+  parser = argparse.ArgumentParser(
+      description=
+      "Script for downloading GitHub Actions artifacts from 'Build python 
wheels' workflow."
+  )
+  parser.add_argument("--github-token", required=True)
+  parser.add_argument("--github-user", required=True)
+  parser.add_argument("--repo-url", required=True)
+  parser.add_argument("--release-branch", required=True)
+  parser.add_argument("--release-commit", required=True)
+  parser.add_argument("--artifacts_dir", required=True)
+
+  args = parser.parse_args()
+
+  global GITHUB_TOKEN, USER_GITHUB_ID, REPO_URL, RELEASE_BRANCH, 
RELEASE_COMMIT, ARTIFACTS_DIR
+  GITHUB_TOKEN = args.github_token
+  USER_GITHUB_ID = args.github_user
+  REPO_URL = args.repo_url
+  RELEASE_BRANCH = args.release_branch
+  RELEASE_COMMIT = args.release_commit
+  ARTIFACTS_DIR = args.artifacts_dir
+
+
+def request_url(url, return_raw_request=False, *args, **kwargs):
+  """Helper function form making requests authorized by GitHub token"""
+  r = requests.get(url, *args, auth=("token", GITHUB_TOKEN), **kwargs)
+  r.raise_for_status()
+  if return_raw_request:
+    return r
+  return r.json()
+
+
+def get_yes_or_no_answer(question):
+  """Helper function to ask yes or no question"""
+  reply = str(input(question + " (y/n): ")).lower().strip()
+  if reply == "y":
+    return True
+  if reply == "n":
+    return False
+  else:
+    return get_yes_or_no_answer("Uhhhh... please enter ")
+
+
+def get_build_wheels_workflow_id():
+  url = GH_API_URL_WORKLOW_FMT.format(repo_url=REPO_URL)
+  data = request_url(url)
+  return data["id"]
+
+
+def get_last_run(workflow_id):
+  url = GH_API_URL_WORKFLOW_RUNS_FMT.format(
+      repo_url=REPO_URL, workflow_id=workflow_id)
+  event_types = ["push", "pull_request"]
+  runs = []
+  for event in event_types:
+    data = request_url(
+        url,
+        params={
+            "event": event, "branch": RELEASE_BRANCH
+        },
+    )
+    runs.extend(data["workflow_runs"])
+
+  filtered_commit_runs = list(
+      filter(lambda w: w.get("head_sha", "") == RELEASE_COMMIT, runs))
+  if not filtered_commit_runs:
+    workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+        repo_url=REPO_URL, workflow_id=workflow_id)
+    raise Exception(
+        f"No runs for workflow (branch {RELEASE_BRANCH}, commit 
{RELEASE_COMMIT}). Verify at {workflow_web_url}"
+    )
+
+  sorted_runs = sorted(
+      filtered_commit_runs,
+      key=lambda w: dateutil.parser.parse(w["created_at"]),
+      reverse=True,
+  )
+  last_run = sorted_runs[0]
+  print(
+      f"Found last run. SHA: {RELEASE_COMMIT}, created_at: 
'{last_run['created_at']}', id: {last_run['id']}"
+  )
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=last_run["id"])
+  print(f"Verify at {workflow_web_url}")
+  print(
+      f"Optional upload to GCS will be available at:\n"
+      
f"\tgs://beam-wheels-staging/{RELEASE_BRANCH}/{RELEASE_COMMIT}-{workflow_id}/"
+  )
+  return last_run
+
+
+def validate_run(run_data):
+  status = run_data["status"]
+  conclusion = run_data["conclusion"]
+  if status == "completed" and conclusion == "success":
+    return run_data
+
+  url = run_data["url"]
+  workflow_web_url = GH_WEB_URL_WORKLOW_RUN_FMT.format(
+      repo_url=REPO_URL, workflow_id=run_data["id"])
+  print(
+      f"Started waiting for Workflow run {run_data['id']} to finish. Check on 
{workflow_web_url}"
+  )
+  start_time = time.time()
+  last_request = start_time
+  spinner = itertools.cycle(["|", "/", "-", "\\"])
+
+  while True:
+    now = time.time()
+    elapsed_time = time.strftime("%H:%M:%S", time.gmtime(now - start_time))
+    print(
+        f"\r {next(spinner)} Waiting to finish. Elapsed time: {elapsed_time}. "
+        f"Current state: status: `{status}`, conclusion: `{conclusion}`.",
+        end="",
+    )
+
+    time.sleep(0.3)
+    if (now - last_request) > 10:
+      last_request = now
+      run_data = request_url(url)
+      status = run_data["status"]
+      conclusion = run_data["conclusion"]
+      if status != "completed":
+        continue
+      elif conclusion == "success":
+        print(
+            f"\rFinished in: {elapsed_time}. "
+            f"Last state: status: `{status}`, conclusion: `{conclusion}`.",
+        )
+        return run_data
+      else:
+        print("\r")
+        raise Exception(
+            f"Run unsuccessful. Conclusion: {conclusion}. Payload: {run_data}")
+
+
+def reset_directory():
+  question = (
+      f"Artifacts directory will be cleared. Is it OK for you?\n"
+      f"Artifacts directory: {ARTIFACTS_DIR}\n"
+      f"Your answer")
+  if get_yes_or_no_answer(question):
+    print(f"Clearing directory: {ARTIFACTS_DIR}")
+    shutil.rmtree(ARTIFACTS_DIR, ignore_errors=True)
+    os.makedirs(ARTIFACTS_DIR)
+  else:
+    print("You said NO for clearing artifacts directory. Quitting ...")
+    quit(1)
+
+
+def download_artifacts(artifacts_url):
+  print("Starting downloading artifacts ... (it may take a while)")
+  data_artifacts = request_url(artifacts_url)
+  filtered_artifacts = [
+      a for a in data_artifacts["artifacts"] if (
+          a["name"].startswith("source_gztar_zip") or
+          a["name"].startswith("wheelhouse"))
+  ]
+  for artifact in filtered_artifacts:
+    url = artifact["archive_download_url"]
+    name = artifact["name"]
+    artifacts_size_mb = round(artifact["size_in_bytes"] / (1024 * 1024), 2)
+    print(
+        f"\tDownloading {name}.zip artifact (size: {artifacts_size_mb} 
megabytes)"
+    )
+    r = request_url(url, return_raw_request=True, allow_redirects=True)
+
+    with tempfile.NamedTemporaryFile(

Review comment:
       If not, should we add the hashes when we build the wheels?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to