This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch v3-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-1-test by this push:
new 963d1b3e520 Cherry-pick-missing-release-validator-tests (#61793)
963d1b3e520 is described below
commit 963d1b3e52059c9796e6ab4b64d95a133ee827ff
Author: Jarek Potiuk <[email protected]>
AuthorDate: Thu Feb 12 00:16:59 2026 +0100
Cherry-pick-missing-release-validator-tests (#61793)
---
.../utils/airflow_release_validator.py | 597 ++++++++++++++++++++
.../src/airflow_breeze/utils/release_validator.py | 624 +++++++++++++++++++++
2 files changed, 1221 insertions(+)
diff --git a/dev/breeze/src/airflow_breeze/utils/airflow_release_validator.py
b/dev/breeze/src/airflow_breeze/utils/airflow_release_validator.py
new file mode 100644
index 00000000000..cbd46cc48f0
--- /dev/null
+++ b/dev/breeze/src/airflow_breeze/utils/airflow_release_validator.py
@@ -0,0 +1,597 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import filecmp
+import shutil
+import tarfile
+import time
+from pathlib import Path
+
+from airflow_breeze.utils.console import console_print
+from airflow_breeze.utils.release_validator import CheckType,
ReleaseValidator, ValidationResult
+from airflow_breeze.utils.run_utils import run_command
+
+
+class AirflowReleaseValidator(ReleaseValidator):
+ """Validator for Apache Airflow release candidates."""
+
+ def __init__(
+ self,
+ version: str,
+ svn_path: Path,
+ airflow_repo_root: Path,
+ task_sdk_version: str | None = None,
+ download_gpg_keys: bool = False,
+ update_svn: bool = True,
+ verbose: bool = False,
+ ):
+ super().__init__(
+ version=version,
+ svn_path=svn_path,
+ airflow_repo_root=airflow_repo_root,
+ download_gpg_keys=download_gpg_keys,
+ update_svn=update_svn,
+ verbose=verbose,
+ )
+ self.task_sdk_version = task_sdk_version or version
+ self.version_without_rc = self._strip_rc_suffix(version)
+ self.task_sdk_version_without_rc =
self._strip_rc_suffix(self.task_sdk_version)
+
+ @property
+ def expected_airflow_file_bases(self) -> list[str]:
+ return [
+ f"apache_airflow-{self.version_without_rc}-source.tar.gz",
+ f"apache_airflow-{self.version_without_rc}.tar.gz",
+ f"apache_airflow-{self.version_without_rc}-py3-none-any.whl",
+ f"apache_airflow_core-{self.version_without_rc}.tar.gz",
+ f"apache_airflow_core-{self.version_without_rc}-py3-none-any.whl",
+ ]
+
+ @property
+ def expected_task_sdk_file_bases(self) -> list[str]:
+ return [
+
f"apache_airflow_task_sdk-{self.task_sdk_version_without_rc}.tar.gz",
+
f"apache_airflow_task_sdk-{self.task_sdk_version_without_rc}-py3-none-any.whl",
+ ]
+
+ def get_distribution_name(self) -> str:
+ return "Apache Airflow"
+
+ def get_svn_directory(self) -> Path:
+ return self.svn_path / self.version
+
+ def get_task_sdk_svn_directory(self) -> Path:
+ return self.svn_path / "task-sdk" / self.task_sdk_version
+
+ def get_svn_directories(self) -> list[Path]:
+ """Return both Airflow and Task SDK SVN directories."""
+ return [self.get_svn_directory(), self.get_task_sdk_svn_directory()]
+
+ def get_expected_files(self) -> list[str]:
+ files = []
+ for base in self.expected_airflow_file_bases:
+ files.extend([base, f"{base}.asc", f"{base}.sha512"])
+
+ return files
+
+ def get_task_sdk_expected_files(self) -> list[str]:
+ files = []
+
+ for base in self.expected_task_sdk_file_bases:
+ files.extend([base, f"{base}.asc", f"{base}.sha512"])
+
+ return files
+
+ def validate_svn_files(self):
+ console_print("\n[bold]SVN File Verification[/bold]")
+ start_time = time.time()
+
+ airflow_svn_dir = self.get_svn_directory()
+ task_sdk_svn_dir = self.get_task_sdk_svn_directory()
+
+ console_print(f"Checking Airflow directory: {airflow_svn_dir}")
+ if not airflow_svn_dir.exists():
+ return ValidationResult(
+ check_type=CheckType.SVN,
+ passed=False,
+ message=f"Airflow SVN directory not found: {airflow_svn_dir}",
+ duration_seconds=time.time() - start_time,
+ )
+
+ console_print(f"Checking Task SDK directory: {task_sdk_svn_dir}")
+ if not task_sdk_svn_dir.exists():
+ return ValidationResult(
+ check_type=CheckType.SVN,
+ passed=False,
+ message=f"Task SDK SVN directory not found:
{task_sdk_svn_dir}",
+ duration_seconds=time.time() - start_time,
+ )
+
+ actual_airflow = {f.name for f in airflow_svn_dir.iterdir() if
f.is_file()}
+ expected_airflow = set(self.get_expected_files())
+ missing_airflow = expected_airflow - actual_airflow
+
+ actual_task_sdk = {f.name for f in task_sdk_svn_dir.iterdir() if
f.is_file()}
+ expected_task_sdk = set(self.get_task_sdk_expected_files())
+ missing_task_sdk = expected_task_sdk - actual_task_sdk
+
+ details = []
+ if missing_airflow:
+ details.append(f"Missing {len(missing_airflow)} Airflow files:")
+ details.extend([f" - {f}" for f in sorted(missing_airflow)[:10]])
+ if missing_task_sdk:
+ details.append(f"Missing {len(missing_task_sdk)} Task SDK files:")
+ details.extend([f" - {f}" for f in sorted(missing_task_sdk)[:10]])
+
+ missing = list(missing_airflow) + list(missing_task_sdk)
+ total_expected = len(expected_airflow) + len(expected_task_sdk)
+ message = (
+ f"All {total_expected} expected files present" if not missing else
f"Missing {len(missing)} files"
+ )
+
+ result = ValidationResult(
+ check_type=CheckType.SVN,
+ passed=not missing,
+ message=message,
+ details=details or None,
+ duration_seconds=time.time() - start_time,
+ )
+ self._print_result(result)
+ return result
+
+ def _compare_archives(self, built_file: Path, svn_file: Path) ->
tuple[bool, list[str]]:
+ """Compare two archives by content.
+
+ Returns:
+ Tuple of (matches, diff_details) where diff_details lists what
differs.
+ """
+ diff_details = []
+
+ if built_file.suffix == ".whl":
+ import zipfile
+
+ try:
+ with zipfile.ZipFile(built_file) as z1,
zipfile.ZipFile(svn_file) as z2:
+ n1 = set(z1.namelist())
+ n2 = set(z2.namelist())
+ only_in_built = {n for n in (n1 - n2)}
+ only_in_svn = {n for n in (n2 - n1)}
+ if only_in_built:
+ diff_details.append(f"Only in built: {',
'.join(sorted(only_in_built)[:5])}")
+ if only_in_svn:
+ diff_details.append(f"Only in SVN: {',
'.join(sorted(only_in_svn)[:5])}")
+ for n in n1 & n2:
+ if z1.getinfo(n).CRC != z2.getinfo(n).CRC:
+ diff_details.append(f"Content differs: {n}")
+ return (not diff_details, diff_details)
+ except Exception as e:
+ return (False, [f"Error: {e}"])
+
+ elif built_file.suffix == ".gz": # tar.gz
+ try:
+ with tarfile.open(built_file, "r:gz") as t1,
tarfile.open(svn_file, "r:gz") as t2:
+ m1 = {m.name: m for m in t1.getmembers()}
+ m2 = {m.name: m for m in t2.getmembers()}
+ only_in_built = {n for n in (set(m1.keys()) -
set(m2.keys()))}
+ only_in_svn = {n for n in (set(m2.keys()) -
set(m1.keys()))}
+ if only_in_built:
+ diff_details.append(f"Only in built: {',
'.join(sorted(only_in_built)[:5])}")
+ if only_in_svn:
+ diff_details.append(f"Only in SVN: {',
'.join(sorted(only_in_svn)[:5])}")
+
+ # First pass: compare sizes (fast, metadata only)
+ common_names = set(m1.keys()) & set(m2.keys())
+ size_mismatches = []
+ for name in common_names:
+ if m1[name].size != m2[name].size:
+ size_mismatches.append(name)
+ elif m1[name].issym() and m2[name].issym():
+ if m1[name].linkname != m2[name].linkname:
+ diff_details.append(f"Symlink differs: {name}")
+ elif m1[name].isdir() != m2[name].isdir():
+ diff_details.append(f"Type differs: {name}")
+
+ if size_mismatches:
+ for name in size_mismatches[:10]:
+ diff_details.append(f"Size differs: {name}
({m1[name].size} vs {m2[name].size})")
+
+ # If file lists and sizes all match, archives are
equivalent
+ return (not diff_details, diff_details)
+ except Exception as e:
+ return (False, [f"Error: {e}"])
+
+ return (False, ["Unknown archive type"])
+
+ def validate_reproducible_build(self):
+ """Build packages from source using git checkout and compare with SVN
artifacts."""
+ console_print("\n[bold]Reproducible Build Verification[/bold]")
+ start_time = time.time()
+
+ tag = self.version
+ repo_root = self.airflow_repo_root
+
+ # Check for uncommitted changes
+ status_result = run_command(
+ ["git", "status", "--porcelain"],
+ cwd=str(repo_root),
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ if status_result.stdout.strip():
+ return ValidationResult(
+ check_type=CheckType.REPRODUCIBLE_BUILD,
+ passed=False,
+ message="Repository has uncommitted changes",
+ details=["Please commit or stash changes before running
reproducible build check."],
+ duration_seconds=time.time() - start_time,
+ )
+
+ # Save current branch name (if on a branch) or HEAD commit
+ branch_result = run_command(
+ ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+ cwd=str(repo_root),
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ original_branch = branch_result.stdout.strip() if
branch_result.returncode == 0 else None
+
+ # Save current HEAD to restore later
+ head_result = run_command(
+ ["git", "rev-parse", "HEAD"],
+ cwd=str(repo_root),
+ capture_output=True,
+ text=True,
+ check=False,
+ )
+ if head_result.returncode != 0:
+ return ValidationResult(
+ check_type=CheckType.REPRODUCIBLE_BUILD,
+ passed=False,
+ message="Failed to get current HEAD",
+ duration_seconds=time.time() - start_time,
+ )
+ original_head = head_result.stdout.strip()
+
+ # Determine what to display and restore to
+ if original_branch and original_branch != "HEAD":
+ original_ref = original_branch
+ original_display = f"branch '{original_branch}'"
+ else:
+ original_ref = original_head
+ original_display = f"commit {original_head[:12]}"
+
+ # Warn user about branch switch
+ console_print(
+ f"[yellow]WARNING: This check will temporarily switch from
{original_display} "
+ f"to tag '{tag}' and should automatically return
afterwards.[/yellow]"
+ )
+
+ console_print(f"Checking out tag: {tag}")
+ checkout_result = run_command(
+ ["git", "checkout", tag],
+ cwd=str(repo_root),
+ check=False,
+ )
+ if checkout_result.returncode != 0:
+ return ValidationResult(
+ check_type=CheckType.REPRODUCIBLE_BUILD,
+ passed=False,
+ message=f"Failed to checkout tag {tag}",
+ details=["Hint: Make sure the tag exists. Run 'git fetch
--tags' to update."],
+ duration_seconds=time.time() - start_time,
+ )
+
+ # Initialize result variables
+ differences = []
+ verified_count = 0
+ missing_from_svn = []
+ build_failed = False
+
+ try:
+ # Clean dist directory (as per manual release process: rm -rf
dist/*)
+ dist_dir = repo_root / "dist"
+ if dist_dir.exists():
+ console_print("Cleaning dist directory...")
+ shutil.rmtree(dist_dir)
+
+ # NOTE: git clean commented out - it removes .venv and other
important files
+ # The Docker-based build should handle this in isolation anyway
+ # console_print("Cleaning untracked files (git clean -fdx)...")
+ # run_command(
+ # ["git", "clean", "-fdx"],
+ # cwd=str(repo_root),
+ # check=False,
+ # )
+
+ # Build packages using breeze from the checked-out tag
+ console_print("Building packages from source...")
+ if not self.build_packages():
+ build_failed = True
+ else:
+ # Compare built packages with SVN
+ dist_dir = repo_root / "dist"
+
+ for pattern in ["*.tar.gz", "*.whl"]:
+ for built_file in dist_dir.glob(pattern):
+ svn_dir = (
+ self.get_task_sdk_svn_directory()
+ if "task_sdk" in built_file.name
+ else self.get_svn_directory()
+ )
+ svn_file = svn_dir / built_file.name
+ if svn_file.exists():
+ console_print(f"Verifying {built_file.name}...",
end=" ")
+ # Default to binary comparison
+ if filecmp.cmp(built_file, svn_file,
shallow=False):
+ verified_count += 1
+ console_print("[green]OK[/green]")
+ else:
+ # Compare archive contents
+ matches, diff_details =
self._compare_archives(built_file, svn_file)
+ if matches:
+ verified_count += 1
+ console_print("[green]OK (content
match)[/green]")
+ else:
+ differences.append(built_file.name)
+ console_print("[red]MISMATCH[/red]")
+ for detail in diff_details[:10]:
+ console_print(f" {detail}")
+ if len(diff_details) > 10:
+ console_print(f" ... and
{len(diff_details) - 10} more differences")
+
+ else:
+ missing_from_svn.append(built_file.name)
+ console_print(
+ f"[yellow]Note: {built_file.name} not in SVN
(may be expected)[/yellow]"
+ )
+ finally:
+ # Always restore original branch/HEAD, regardless of success or
failure
+ console_print(f"Restoring to {original_display}...")
+ restore_result = run_command(
+ ["git", "checkout", original_ref],
+ cwd=str(repo_root),
+ check=False,
+ )
+ if restore_result.returncode == 0:
+ console_print(f"[green]Successfully restored to
{original_display}[/green]")
+ else:
+ console_print(
+ f"[red]WARNING: Failed to restore to {original_display}. "
+ f"Please manually run: git checkout {original_ref}[/red]"
+ )
+
+ # Return result after restoring HEAD
+ if build_failed:
+ result = ValidationResult(
+ check_type=CheckType.REPRODUCIBLE_BUILD,
+ passed=False,
+ message="Failed to build packages",
+ duration_seconds=time.time() - start_time,
+ )
+ self._print_result(result)
+ return result
+
+ if not differences:
+ message = f"All {verified_count} packages are identical to SVN"
+ else:
+ message = f"{len(differences)} packages differ from SVN"
+
+ details = None
+ if differences:
+ details = differences[:]
+ if missing_from_svn and self.verbose:
+ details = details or []
+ details.append(f"Note: {len(missing_from_svn)} built packages not
in SVN (may be expected)")
+
+ result = ValidationResult(
+ check_type=CheckType.REPRODUCIBLE_BUILD,
+ passed=not differences,
+ message=message,
+ details=details,
+ duration_seconds=time.time() - start_time,
+ )
+ self._print_result(result)
+ return result
+
+ def validate_licenses(self):
+ """Run Apache RAT license check on source tarball."""
+ console_print("\n[bold]Apache RAT License Verification[/bold]")
+ start_time = time.time()
+
+ source_dir = Path("/tmp/apache-airflow-src")
+
+ # Download Apache RAT with checksum verification
+ rat_jar = self._download_apache_rat()
+ if not rat_jar:
+ return ValidationResult(
+ check_type=CheckType.LICENSES,
+ passed=False,
+ message="Failed to download or verify Apache RAT",
+ duration_seconds=time.time() - start_time,
+ )
+
+ source_tarball = self.get_svn_directory() /
f"apache_airflow-{self.version_without_rc}-source.tar.gz"
+ if not source_tarball.exists():
+ return ValidationResult(
+ check_type=CheckType.LICENSES,
+ passed=False,
+ message=f"Source tarball not found: {source_tarball}",
+ duration_seconds=time.time() - start_time,
+ )
+
+ console_print(f"Extracting source to {source_dir}...")
+ if source_dir.exists():
+ shutil.rmtree(source_dir)
+ source_dir.mkdir(parents=True)
+
+ with tarfile.open(source_tarball, "r:gz") as tar:
+ for member in tar.getmembers():
+ member.name = "/".join(member.name.split("/")[1:])
+ if member.name:
+ tar.extract(member, source_dir, filter="data")
+
+ rat_excludes = source_dir / ".rat-excludes"
+ console_print("Running Apache RAT...")
+ result = run_command(
+ [
+ "java",
+ "-jar",
+ str(rat_jar),
+ "--input-exclude-file",
+ str(rat_excludes) if rat_excludes.exists() else "/dev/null",
+ str(source_dir),
+ ],
+ check=False,
+ capture_output=True,
+ text=True,
+ )
+
+ error_lines = [line.strip() for line in result.stdout.split("\n") if
line.strip().startswith("!")]
+ unapproved = unknown = 0
+
+ for line in result.stdout.split("\n"):
+ if "Unapproved:" in line:
+ try:
+ unapproved = int(line.split("Unapproved:")[1].split()[0])
+ except (IndexError, ValueError):
+ pass
+ if "Unknown:" in line:
+ try:
+ unknown = int(line.split("Unknown:")[1].split()[0])
+ except (IndexError, ValueError):
+ pass
+
+ details = []
+ if error_lines:
+ details.append(f"Found {len(error_lines)} license issues:")
+ details.extend(error_lines[:10])
+ if len(error_lines) > 10:
+ details.append(f"... and {len(error_lines) - 10} more")
+ if unapproved > 0:
+ details.append(f"Unapproved licenses: {unapproved}")
+ if unknown > 0:
+ details.append(f"Unknown licenses: {unknown}")
+
+ # Show verbose RAT output if requested
+ if self.verbose:
+ separator_count = 0
+ for line in result.stdout.splitlines():
+ if line.strip().startswith("**********"):
+ separator_count += 1
+ if separator_count >= 3:
+ break
+ console_print(line)
+
+ # Clean up extracted source directory (~500MB)
+ if source_dir.exists():
+ shutil.rmtree(source_dir)
+
+ passed = not error_lines and unapproved == 0 and unknown == 0
+ message = (
+ "All files have approved licenses"
+ if passed
+ else f"Found {len(error_lines)} issues, {unapproved} unapproved,
{unknown} unknown"
+ )
+
+ result = ValidationResult(
+ check_type=CheckType.LICENSES,
+ passed=passed,
+ message=message,
+ details=details or None,
+ duration_seconds=time.time() - start_time,
+ )
+ self._print_result(result)
+ return result
+
+ def build_packages(self) -> bool:
+ """Build Airflow distributions and source tarball."""
+ console_print("Building Airflow distributions...")
+
+ # Use breeze from the current checkout
+ base_cmd = ["breeze"]
+
+ result = run_command(
+ base_cmd
+ + [
+ "release-management",
+ "prepare-airflow-distributions",
+ "--distribution-format",
+ "both",
+ ],
+ cwd=str(self.airflow_repo_root),
+ check=False,
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode != 0:
+ console_print("[red]Failed to build Airflow distributions[/red]")
+ if result.stdout:
+
console_print(f"[yellow]STDOUT:[/yellow]\n{result.stdout[-2000:]}")
+ if result.stderr:
+
console_print(f"[yellow]STDERR:[/yellow]\n{result.stderr[-2000:]}")
+ return False
+
+ console_print("Building Task SDK distributions...")
+ result = run_command(
+ base_cmd
+ + [
+ "release-management",
+ "prepare-task-sdk-distributions",
+ "--distribution-format",
+ "both",
+ ],
+ cwd=str(self.airflow_repo_root),
+ check=False,
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode != 0:
+ console_print("[red]Failed to build Task SDK distributions[/red]")
+ if result.stdout:
+
console_print(f"[yellow]STDOUT:[/yellow]\n{result.stdout[-2000:]}")
+ if result.stderr:
+
console_print(f"[yellow]STDERR:[/yellow]\n{result.stderr[-2000:]}")
+ return False
+
+ console_print("Building source tarball...")
+ cmd = base_cmd + [
+ "release-management",
+ "prepare-tarball",
+ "--tarball-type",
+ "apache_airflow",
+ "--version",
+ self.version_without_rc,
+ ]
+ if version_suffix := self._get_version_suffix():
+ cmd.extend(["--version-suffix", version_suffix])
+
+ result = run_command(
+ cmd, cwd=str(self.airflow_repo_root), check=False,
capture_output=True, text=True
+ )
+ if result.returncode != 0:
+ console_print("[red]Failed to build source tarball[/red]")
+ if result.stdout:
+
console_print(f"[yellow]STDOUT:[/yellow]\n{result.stdout[-2000:]}")
+ if result.stderr:
+
console_print(f"[yellow]STDERR:[/yellow]\n{result.stderr[-2000:]}")
+ return False
+
+ console_print("[green]All packages built successfully[/green]")
+ return True
diff --git a/dev/breeze/src/airflow_breeze/utils/release_validator.py
b/dev/breeze/src/airflow_breeze/utils/release_validator.py
new file mode 100644
index 00000000000..a28ce5bf996
--- /dev/null
+++ b/dev/breeze/src/airflow_breeze/utils/release_validator.py
@@ -0,0 +1,624 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+
+import re
+import shutil
+import subprocess
+import tempfile
+import time
+from abc import ABC, abstractmethod
+from collections.abc import Callable
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+
+from airflow_breeze.utils.console import console_print
+from airflow_breeze.utils.run_utils import run_command
+
+
+class CheckType(str, Enum):
+ SVN = "svn"
+ REPRODUCIBLE_BUILD = "reproducible-build"
+ SIGNATURES = "signatures"
+ CHECKSUMS = "checksums"
+ LICENSES = "licenses"
+
+
+@dataclass
+class ValidationResult:
+ check_type: CheckType
+ passed: bool
+ message: str
+ details: list[str] | None = None
+ duration_seconds: float | None = None
+
+
+class ReleaseValidator(ABC):
+ """Base class for release validators with common functionality for PMC
verification."""
+
+ APACHE_RAT_JAR_DOWNLOAD_URL = (
+
"https://downloads.apache.org/creadur/apache-rat-0.17/apache-rat-0.17-bin.tar.gz"
+ )
+ APACHE_RAT_JAR_SHA512_DOWNLOAD_URL = (
+
"https://downloads.apache.org/creadur/apache-rat-0.17/apache-rat-0.17-bin.tar.gz.sha512"
+ )
+ GPG_KEYS_URL = "https://dist.apache.org/repos/dist/release/airflow/KEYS"
+
+ def __init__(
+ self,
+ version: str,
+ svn_path: Path,
+ airflow_repo_root: Path,
+ download_gpg_keys: bool = False,
+ update_svn: bool = True,
+ verbose: bool = False,
+ ):
+ self.version = version
+ self.svn_path = svn_path
+ self.airflow_repo_root = airflow_repo_root
+ self.download_gpg_keys = download_gpg_keys
+ self.update_svn = update_svn
+ self.verbose = verbose
+ self.results: list[ValidationResult] = []
+
+ @abstractmethod
+ def get_distribution_name(self) -> str:
+ pass
+
+ @abstractmethod
+ def get_svn_directory(self) -> Path:
+ pass
+
+ @abstractmethod
+ def get_expected_files(self) -> list[str]:
+ pass
+
+ @abstractmethod
+ def build_packages(self, source_dir: Path | None = None) -> bool:
+ pass
+
+ @abstractmethod
+ def validate_svn_files(self) -> ValidationResult:
+ pass
+
+ @abstractmethod
+ def validate_reproducible_build(self) -> ValidationResult:
+ pass
+
+ @abstractmethod
+ def validate_licenses(self) -> ValidationResult:
+ pass
+
+ def get_svn_directories(self) -> list[Path]:
+ """Return list of SVN directories to validate. Override for
multi-directory validation."""
+ return [self.get_svn_directory()]
+
+ def validate_signatures(self) -> ValidationResult:
+ """Verify GPG signatures for all .asc files."""
+ console_print("\n[bold]GPG Signature Verification[/bold]")
+ start_time = time.time()
+
+ asc_files: list[Path] = []
+ for svn_dir in self.get_svn_directories():
+ if svn_dir.exists():
+ asc_files.extend(svn_dir.glob("*.asc"))
+
+ if not asc_files:
+ return ValidationResult(
+ check_type=CheckType.SIGNATURES,
+ passed=False,
+ message="No .asc files found",
+ duration_seconds=time.time() - start_time,
+ )
+
+ failed = []
+ for asc_file in asc_files:
+ result = run_command(
+ ["gpg", "--verify", str(asc_file)], check=False,
capture_output=True, text=True
+ )
+ if result.returncode != 0:
+ failed.append(asc_file.name)
+ elif self.verbose:
+ # Extract signer from GPG output
+ match = re.search(r"Good signature from \"(.*)\"",
result.stderr)
+ signer = match.group(1) if match else "Unknown"
+ console_print(f" {asc_file.name}: Valid signature from
{signer}")
+
+ message = (
+ f"All {len(asc_files)} signatures verified"
+ if not failed
+ else f"{len(failed)} of {len(asc_files)} signatures failed"
+ )
+
+ details = failed[:] if failed else None
+ if failed:
+ details = details or []
+ details.append(
+ "Hint: If signatures failed due to missing keys, try running
with --download-gpg-keys"
+ )
+ details.append(f"or download manually from {self.GPG_KEYS_URL}")
+
+ result = ValidationResult(
+ check_type=CheckType.SIGNATURES,
+ passed=not failed,
+ message=message,
+ details=details,
+ duration_seconds=time.time() - start_time,
+ )
+ self._print_result(result)
+ return result
+
+ def validate_checksums(self) -> ValidationResult:
+ """Verify SHA512 checksums for all .sha512 files."""
+ console_print("\n[bold]SHA512 Checksum Verification[/bold]")
+ start_time = time.time()
+
+ sha512_files: list[Path] = []
+ for svn_dir in self.get_svn_directories():
+ if svn_dir.exists():
+ sha512_files.extend(svn_dir.glob("*.sha512"))
+
+ if not sha512_files:
+ return ValidationResult(
+ check_type=CheckType.CHECKSUMS,
+ passed=False,
+ message="No .sha512 files found",
+ duration_seconds=time.time() - start_time,
+ )
+
+ failed = []
+ for sha_file in sha512_files:
+ expected = sha_file.read_text().split()[0]
+ target_file = sha_file.parent / sha_file.name.replace(".sha512",
"")
+
+ if not target_file.exists():
+ failed.append(f"{sha_file.name} (target file missing)")
+ continue
+
+ result = run_command(
+ ["shasum", "-a", "512", str(target_file)], check=False,
capture_output=True, text=True
+ )
+ if result.returncode != 0 or result.stdout.split()[0] != expected:
+ failed.append(sha_file.name)
+ elif self.verbose:
+ console_print(f" {sha_file.name}: OK")
+
+ message = (
+ f"All {len(sha512_files)} checksums valid"
+ if not failed
+ else f"{len(failed)} of {len(sha512_files)} checksums failed"
+ )
+
+ result = ValidationResult(
+ check_type=CheckType.CHECKSUMS,
+ passed=not failed,
+ message=message,
+ details=failed or None,
+ duration_seconds=time.time() - start_time,
+ )
+ self._print_result(result)
+ return result
+
+ @property
+ def check_methods(self) -> dict[CheckType, Callable]:
+ return {
+ CheckType.SVN: self.validate_svn_files,
+ CheckType.REPRODUCIBLE_BUILD: self.validate_reproducible_build,
+ CheckType.SIGNATURES: self.validate_signatures,
+ CheckType.CHECKSUMS: self.validate_checksums,
+ CheckType.LICENSES: self.validate_licenses,
+ }
+
+ @property
+ def all_check_types(self) -> list[CheckType]:
+ """Return all available check types in order.
+
+ Order matches README_RELEASE_AIRFLOW.md section order for PMC
verification:
+ 1. Reproducible build - Build from source and compare with SVN
artifacts
+ 2. SVN - Verify expected files exist in SVN
+ 3. Licenses - Apache RAT license verification
+ 4. Signatures - GPG signature verification
+ 5. Checksums - SHA512 checksum verification
+
+ Note: Tests are independent and can run in any order.
+ """
+ return [
+ CheckType.REPRODUCIBLE_BUILD,
+ CheckType.SVN,
+ CheckType.LICENSES,
+ CheckType.SIGNATURES,
+ CheckType.CHECKSUMS,
+ ]
+
+ def _get_prerequisites_for_checks(self, checks: list[CheckType]) ->
dict[str, list[CheckType]]:
+ """Return mapping of prerequisite -> list of checks that require it."""
+ # Define which checks require which prerequisites
+ prereq_map = {
+ "java": [CheckType.LICENSES], # Apache RAT requires Java
+ "gpg": [CheckType.SIGNATURES], # GPG signature verification
+ "svn": list(CheckType), # All checks need SVN files
+ "docker": [CheckType.REPRODUCIBLE_BUILD], # Docker builds
+ "hatch": [CheckType.REPRODUCIBLE_BUILD], # Package builds
+ "clean_git": [CheckType.REPRODUCIBLE_BUILD], # No uncommitted
changes
+ }
+
+ # Filter to only prerequisites needed for the selected checks
+ needed: dict[str, list[CheckType]] = {}
+ for prereq, required_by in prereq_map.items():
+ matching = [c for c in checks if c in required_by]
+ if matching:
+ needed[prereq] = matching
+ return needed
+
+ def validate_prerequisites(self, checks: list[CheckType] | None = None) ->
bool:
+ """Verify prerequisites based on which checks will be run."""
+ if checks is None:
+ checks = self.all_check_types
+
+ console_print("\n[bold]Prerequisites Verification[/bold]")
+ failed: list[str] = []
+ warnings: list[str] = []
+
+ needed_prereqs = self._get_prerequisites_for_checks(checks)
+
+ # Check Java (required for Apache RAT / license checks)
+ if "java" in needed_prereqs:
+ java_path = shutil.which("java")
+ if not java_path:
+ failed.append("Java is not installed (required for Apache
RAT)")
+ elif self.verbose:
+ console_print(f" [green]✓[/green] Java: {java_path}")
+
+ # Check GPG (required for signature verification)
+ if "gpg" in needed_prereqs:
+ gpg_path = shutil.which("gpg")
+ if not gpg_path:
+ failed.append("GPG is not installed (required for signature
verification)")
+ elif self.verbose:
+ console_print(f" [green]✓[/green] GPG: {gpg_path}")
+
+ # Check SVN (required for release verification)
+ if "svn" in needed_prereqs:
+ svn_path = shutil.which("svn")
+ if not svn_path:
+ failed.append("SVN is not installed (required for release
verification)")
+ elif self.verbose:
+ console_print(f" [green]✓[/green] SVN: {svn_path}")
+
+ # Check Docker (required for reproducible builds)
+ if "docker" in needed_prereqs:
+ docker_path = shutil.which("docker")
+ if not docker_path:
+ failed.append("Docker is not installed (required for
reproducible builds)")
+ else:
+ # Check if Docker daemon is running
+ result = run_command(
+ ["docker", "info"],
+ check=False,
+ capture_output=True,
+ )
+ if result.returncode != 0:
+ failed.append("Docker is installed but not running (start
Docker daemon)")
+ elif self.verbose:
+ console_print(f" [green]✓[/green] Docker: {docker_path}
(daemon running)")
+
+ # Check hatch (required for local package builds)
+ if "hatch" in needed_prereqs:
+ hatch_path = shutil.which("hatch")
+ if not hatch_path:
+ failed.append(
+ "hatch is not installed (required for reproducible builds,
install with: uv tool install hatch)"
+ )
+ elif self.verbose:
+ console_print(f" [green]✓[/green] hatch: {hatch_path}")
+
+ # Check for clean git working directory (required for reproducible
builds)
+ if "clean_git" in needed_prereqs:
+ if not self._check_clean_git_working_directory():
+ failed.append(
+ "Git working directory has uncommitted or staged changes "
+ "(reproducible build requires clean checkout to switch
tags)"
+ )
+ elif self.verbose:
+ console_print(" [green]✓[/green] Git: working directory
clean")
+
+ # Optionally download GPG keys
+ if self.download_gpg_keys:
+ self._download_gpg_keys()
+
+ if warnings:
+ console_print("[yellow]Warnings:[/yellow]")
+ for w in warnings:
+ console_print(f" - {w}")
+
+ if failed:
+ console_print("[red]Prerequisites failed:[/red]")
+ for f in failed:
+ console_print(f" - {f}")
+ console_print("[yellow]Please install missing prerequisites and
try again.[/yellow]")
+ return False
+
+ # Optionally update SVN checkout
+ if self.update_svn:
+ if not self._update_svn():
+ return False
+ else:
+ console_print("[yellow]SVN update skipped. The local revision
might not be the latest.[/yellow]")
+
+ # Check that release files exist in the SVN directory
+ if not self._verify_release_files_exist():
+ return False
+
+ console_print("[green]All required prerequisites met[/green]")
+ return True
+
+ def _download_gpg_keys(self) -> None:
+ """Download GPG keys from ASF."""
+ console_print("Downloading GPG keys from ASF...")
+ with tempfile.NamedTemporaryFile() as tmp_keys:
+ run_command(["wget", "-qO", tmp_keys.name, self.GPG_KEYS_URL],
check=True)
+ run_command(["gpg", "--import", tmp_keys.name], check=True,
capture_output=True)
+ console_print("[green]GPG keys downloaded and imported[/green]")
+
+ def _check_svn_locks(self, svn_dir: Path) -> bool:
+ """Check if SVN working copy is locked."""
+ # svn status shows 'L' in second column for locked items
+ result = run_command(
+ ["svn", "status", str(svn_dir)],
+ check=False,
+ capture_output=True,
+ text=True,
+ )
+ # Check for lock indicator in output (L in column 3) or E155037 error
+ if "E155037" in result.stderr:
+ return True
+ for line in result.stdout.splitlines():
+ # SVN status format: columns are [item
status][props][lock][history][switched][info][conflict]
+ # Lock is in column 3 (index 2), shown as 'L'
+ if len(line) > 2 and line[2] == "L":
+ return True
+ return False
+
+ def _check_clean_git_working_directory(self) -> bool:
+ """Check if git working directory is clean (no uncommitted or staged
changes)."""
+ result = run_command(
+ ["git", "-C", str(self.airflow_repo_root), "status",
"--porcelain"],
+ check=False,
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode != 0:
+ return False
+ # If output is empty, working directory is clean
+ return not result.stdout.strip()
+
+ def _update_svn(self) -> bool:
+ """Update SVN checkout to ensure we have the latest release files."""
+ # Update only the specific directories needed, not the entire SVN tree
+ svn_dirs = self.get_svn_directories()
+
+ for svn_dir in svn_dirs:
+ # Check for SVN locks before attempting update (prevents hanging)
+ if self._check_svn_locks(svn_dir.parent):
+ console_print(f"[red]SVN working copy is locked:
{svn_dir.parent}[/red]")
+ console_print(
+ "\n[yellow]Hint: Run the following to release SVN
locks:[/yellow]\n"
+ f" svn cleanup {svn_dir.parent}\n"
+ "\n[yellow]Or skip SVN update with --no-update-svn if
files are already up to date.[/yellow]"
+ )
+ return False
+
+ console_print(f"Updating SVN checkout: {svn_dir}...")
+
+ result = run_command(
+ ["svn", "update", "--set-depth=infinity", str(svn_dir)],
+ check=False,
+ capture_output=True,
+ text=True,
+ )
+
+ if result.returncode != 0:
+ console_print("[red]Failed to update SVN checkout[/red]")
+ if result.stderr:
+ console_print(f"[red]{result.stderr.strip()}[/red]")
+ console_print(
+ "[yellow]Hint: Make sure you have checked out the SVN
repository:[/yellow]\n"
+ " svn checkout --depth=immediates
https://dist.apache.org/repos/dist asf-dist\n"
+ " svn update --set-depth=infinity asf-dist/dev/airflow"
+ )
+ return False
+
+ console_print("[green]SVN checkout updated[/green]")
+ return True
+
+ def _verify_release_files_exist(self) -> bool:
+ """Verify that the SVN directories contain release files."""
+ for svn_dir in self.get_svn_directories():
+ if not svn_dir.exists():
+ console_print(f"[red]SVN directory does not exist:
{svn_dir}[/red]")
+ console_print(
+ "[yellow]Hint: Make sure the version is correct and SVN is
checked out.[/yellow]\n"
+ " You may need to run with --update-svn to fetch the
latest files."
+ )
+ return False
+
+ # Check for release artifacts (.tar.gz or .whl files)
+ release_files = list(svn_dir.glob("*.tar.gz")) +
list(svn_dir.glob("*.whl"))
+ if not release_files:
+ console_print(f"[red]No release files found in:
{svn_dir}[/red]")
+ console_print(
+ "[yellow]The directory exists but contains no release
artifacts.\n"
+ "This may happen if:\n"
+ " - The release was already published and files were
moved to the release folder\n"
+ " - The SVN checkout is out of date\n"
+ " - The version is incorrect\n\n"
+ "Hint: Try running with --update-svn to fetch the latest
files.[/yellow]"
+ )
+ return False
+
+ if self.verbose:
+ console_print(f" [green]✓[/green] Found {len(release_files)}
release files in {svn_dir}")
+
+ return True
+
+ def _download_apache_rat(self) -> Path | None:
+ """Download and verify Apache RAT jar.
+
+ Returns the path to the jar file, or None if download/verification
failed.
+ """
+ rat_jar = Path("/tmp/apache-rat-0.17/apache-rat-0.17.jar")
+
+ if rat_jar.exists():
+ console_print("[green]Apache RAT already present[/green]")
+ return rat_jar
+
+ console_print("Downloading Apache RAT...")
+ rat_tarball = Path("/tmp/apache-rat-0.17-bin.tar.gz")
+ rat_sha512 = Path("/tmp/apache-rat-0.17-bin.tar.gz.sha512")
+
+ # Download tarball
+ wget_result = run_command(
+ ["wget", "-qO", str(rat_tarball),
self.APACHE_RAT_JAR_DOWNLOAD_URL],
+ check=False,
+ capture_output=True,
+ )
+ if wget_result.returncode != 0:
+ console_print("[red]Failed to download Apache RAT[/red]")
+ return None
+
+ # Download and verify checksum
+ console_print("Verifying Apache RAT Checksum...")
+ sha_download = run_command(
+ ["wget", "-qO", str(rat_sha512),
self.APACHE_RAT_JAR_SHA512_DOWNLOAD_URL],
+ check=False,
+ capture_output=True,
+ )
+ if sha_download.returncode != 0:
+ console_print("[red]Failed to download Apache RAT checksum[/red]")
+ return None
+
+ sha_result = run_command(["shasum", "-a", "512", str(rat_tarball)],
capture_output=True, text=True)
+ calculated_sha = sha_result.stdout.split()[0]
+ expected_sha = rat_sha512.read_text().split()[0]
+
+ if calculated_sha != expected_sha:
+ console_print("[red]Apache RAT checksum verification
failed![/red]")
+ console_print(f" Expected: {expected_sha[:32]}...")
+ console_print(f" Got: {calculated_sha[:32]}...")
+ return None
+
+ # Extract
+ subprocess.run(["tar", "-C", "/tmp", "-xzf", str(rat_tarball)],
check=True)
+ console_print("[green]Apache RAT downloaded and verified[/green]")
+
+ return rat_jar
+
+ def validate(self, checks: list[CheckType] | None = None) -> bool:
+ """Run validation checks. Override to add prerequisites."""
+ if checks is None:
+ checks = self.all_check_types
+
+ if not self.validate_prerequisites(checks):
+ return False
+
+ return self._run_checks(checks)
+
+ def _run_checks(self, checks: list[CheckType] | None = None) -> bool:
+ """Internal method to run the actual validation checks."""
+ if checks is None:
+ checks = self.all_check_types
+
+ self.checks_run = checks # Track which checks were actually run
+
+ console_print(f"\n[bold cyan]Validating {self.get_distribution_name()}
{self.version}[/bold cyan]")
+ console_print(f"SVN Path: {self.svn_path}")
+ console_print(f"Airflow Root: {self.airflow_repo_root}")
+
+ for check_type in checks:
+ if check_type in self.check_methods:
+ result = self.check_methods[check_type]()
+ self.results.append(result)
+
+ self._print_summary()
+ return all(r.passed for r in self.results)
+
+ def _print_result(self, result: ValidationResult):
+ status = "[green]PASSED[/green]" if result.passed else
"[red]FAILED[/red]"
+ console_print(f"Status: {status} - {result.message}")
+
+ if result.details:
+ for detail in result.details:
+ console_print(f" {detail}")
+
+ if result.duration_seconds:
+ console_print(f"Duration: {result.duration_seconds:.1f}s")
+
+ def _print_summary(self):
+ console_print("\n" + "=" * 70)
+ passed_count = sum(1 for r in self.results if r.passed)
+ total_count = len(self.results)
+
+ # Check if we ran all available checks
+ all_checks = set(self.all_check_types)
+ checks_run = set(getattr(self, "checks_run", all_checks))
+ skipped_checks = all_checks - checks_run
+
+ if passed_count == total_count:
+ console_print(f"[bold green]ALL CHECKS PASSED
({passed_count}/{total_count})[/bold green]")
+
+ console_print("\nPassed checks:")
+ for result in self.results:
+ console_print(f" - {result.check_type.value}:
{result.message}")
+
+ if skipped_checks:
+ console_print(
+ f"\n[yellow]Note: Only {total_count} of {len(all_checks)}
checks were run.[/yellow]"
+ )
+ console_print("Skipped checks:")
+ for check in sorted(skipped_checks, key=lambda c: c.value):
+ console_print(f" - {check.value}")
+ console_print(
+ "\n[yellow]You may vote +1 (binding) only if you have
verified "
+ "the skipped checks manually or by running them
separately.[/yellow]"
+ )
+ else:
+ console_print("\nYou can vote +1 (binding) on this release.")
+ else:
+ failed_count = total_count - passed_count
+ console_print(
+ f"[bold red]SOME CHECKS FAILED ({failed_count} failed,
{passed_count} passed)[/bold red]"
+ )
+ console_print("\nFailed checks:")
+ for result in self.results:
+ if not result.passed:
+ console_print(f" - {result.check_type.value}:
{result.message}")
+ console_print("\nPlease review failures above before voting.")
+
+ total_duration = sum(r.duration_seconds or 0 for r in self.results)
+ console_print(f"\nTotal validation time: {total_duration:.1f}s")
+ console_print("=" * 70)
+
+ def _strip_rc_suffix(self, version: str) -> str:
+ return re.sub(r"rc\d+$", "", version)
+
+ def _get_version_suffix(self) -> str:
+ if "rc" in self.version:
+ match = re.search(r"(rc\d+)$", self.version)
+ if match:
+ return match.group(1)
+ return ""