This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 77a47ec Extract archives for comparison with GitHub trees
77a47ec is described below
commit 77a47ec24fc37d7c946aab6c30a8547da30e819e
Author: Sean B. Palmer <[email protected]>
AuthorDate: Thu Feb 5 15:30:50 2026 +0000
Extract archives for comparison with GitHub trees
---
atr/tasks/checks/compare.py | 54 +++++++++++++++++
tests/unit/test_checks_compare.py | 123 ++++++++++++++++++++++++++++++++++++++
2 files changed, 177 insertions(+)
diff --git a/atr/tasks/checks/compare.py b/atr/tasks/checks/compare.py
index 8f1464c..d1afe88 100644
--- a/atr/tasks/checks/compare.py
+++ b/atr/tasks/checks/compare.py
@@ -34,13 +34,16 @@ import dulwich.porcelain
import dulwich.refs
import pydantic
+import atr.archives as archives
import atr.attestable as attestable
+import atr.config as config
import atr.log as log
import atr.models.results as results
import atr.sbom.models.github as github_models
import atr.tasks.checks as checks
import atr.util as util
+_CONFIG: Final = config.get()
_DEFAULT_EMAIL: Final[str] = "atr@localhost"
_DEFAULT_USER: Final[str] = "atr"
@@ -72,13 +75,28 @@ async def source_trees(args: checks.FunctionArguments) ->
results.Results | None
payload = await _load_tp_payload(args.project_name, args.version_name,
args.revision_number)
checkout_dir: str | None = None
+ archive_dir: str | None = None
if payload is not None:
+ if not (primary_abs_path := await recorder.abs_path()):
+ return None
+ max_extract_size = args.extra_args.get("max_extract_size",
_CONFIG.MAX_EXTRACT_SIZE)
+ chunk_size = args.extra_args.get("chunk_size",
_CONFIG.EXTRACT_CHUNK_SIZE)
tmp_dir = util.get_tmp_dir()
await aiofiles.os.makedirs(tmp_dir, exist_ok=True)
async with util.async_temporary_directory(prefix="trees-",
dir=tmp_dir) as temp_dir:
github_dir = temp_dir / "github"
+ archive_dir_path = temp_dir / "archive"
await aiofiles.os.makedirs(github_dir, exist_ok=True)
+ await aiofiles.os.makedirs(archive_dir_path, exist_ok=True)
checkout_dir = await _checkout_github_source(payload, github_dir)
+ if await _decompress_archive(primary_abs_path, archive_dir_path,
max_extract_size, chunk_size):
+ archive_dir = str(archive_dir_path)
+ else:
+ await recorder.failure(
+ "Failed to extract source archive for comparison",
+ {"archive_path": str(primary_abs_path), "extract_dir":
str(archive_dir_path)},
+ )
+ return None
payload_summary = _payload_summary(payload)
log.info(
"Ran compare.source_trees successfully",
@@ -88,6 +106,7 @@ async def source_trees(args: checks.FunctionArguments) ->
results.Results | None
path=args.primary_rel_path,
github_payload=payload_summary,
github_checkout=checkout_dir,
+ archive_checkout=archive_dir,
)
return None
@@ -147,6 +166,41 @@ def _clone_repo(repo_url: str, sha: str, checkout_dir:
pathlib.Path) -> None:
shutil.rmtree(git_dir)
+async def _decompress_archive(
+ archive_path: pathlib.Path,
+ extract_dir: pathlib.Path,
+ max_extract_size: int,
+ chunk_size: int,
+) -> bool:
+ started_ns = time.perf_counter_ns()
+ try:
+ extracted_size, _extracted_paths = await asyncio.to_thread(
+ archives.extract,
+ str(archive_path),
+ str(extract_dir),
+ max_size=max_extract_size,
+ chunk_size=chunk_size,
+ )
+ except (archives.ExtractionError, OSError):
+ elapsed_ms = (time.perf_counter_ns() - started_ns) / 1_000_000.0
+ log.exception(
+ "Failed to extract source archive for compare.source_trees",
+ archive_path=str(archive_path),
+ extract_dir=str(extract_dir),
+ extract_ms=elapsed_ms,
+ )
+ return False
+ elapsed_ms = (time.perf_counter_ns() - started_ns) / 1_000_000.0
+ log.info(
+ "Extracted source archive for compare.source_trees",
+ archive_path=str(archive_path),
+ extract_dir=str(extract_dir),
+ extracted_bytes=extracted_size,
+ extract_ms=elapsed_ms,
+ )
+ return True
+
+
def _ensure_clone_identity_env() -> None:
os.environ["USER"] = _DEFAULT_USER
os.environ["EMAIL"] = _DEFAULT_EMAIL
diff --git a/tests/unit/test_checks_compare.py
b/tests/unit/test_checks_compare.py
index b57ee97..1ee3c76 100644
--- a/tests/unit/test_checks_compare.py
+++ b/tests/unit/test_checks_compare.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+import datetime
import pathlib
from collections.abc import Callable, Mapping
@@ -23,6 +24,7 @@ import dulwich.objects
import dulwich.refs
import pytest
+import atr.models.sql
import atr.sbom.models.github
import atr.tasks.checks
import atr.tasks.checks.compare
@@ -58,6 +60,29 @@ class CommitStub:
self.tree = tree
+class DecompressRecorder:
+ def __init__(self, return_value: bool = True) -> None:
+ self.archive_path: pathlib.Path | None = None
+ self.extract_dir: pathlib.Path | None = None
+ self.max_extract_size: int | None = None
+ self.chunk_size: int | None = None
+ self.return_value = return_value
+
+ async def __call__(
+ self,
+ archive_path: pathlib.Path,
+ extract_dir: pathlib.Path,
+ max_extract_size: int,
+ chunk_size: int,
+ ) -> bool:
+ self.archive_path = archive_path
+ self.extract_dir = extract_dir
+ self.max_extract_size = max_extract_size
+ self.chunk_size = chunk_size
+ assert await aiofiles.os.path.exists(extract_dir)
+ return self.return_value
+
+
class GitClientStub:
def __init__(self) -> None:
self.closed = False
@@ -105,6 +130,21 @@ class ParseCommitRecorder:
return self.commit
+class ExtractErrorRaiser:
+ def __call__(self, *args: object, **kwargs: object) -> tuple[int,
list[str]]:
+ raise atr.tasks.checks.compare.archives.ExtractionError("Extraction
error")
+
+
+class ExtractRecorder:
+ def __init__(self, extracted_size: int = 123) -> None:
+ self.calls: list[tuple[str, str, int, int]] = []
+ self.extracted_size = extracted_size
+
+ def __call__(self, archive_path: str, extract_dir: str, max_size: int,
chunk_size: int) -> tuple[int, list[str]]:
+ self.calls.append((archive_path, extract_dir, max_size, chunk_size))
+ return self.extracted_size, []
+
+
class PayloadLoader:
def __init__(self, payload: atr.sbom.models.github.TrustedPublisherPayload
| None) -> None:
self.payload = payload
@@ -150,11 +190,29 @@ class RecorderStub(atr.tasks.checks.Recorder):
member_rel_path=None,
afresh=False,
)
+ self.failure_calls: list[tuple[str, object]] = []
self._is_source = is_source
async def primary_path_is_source(self) -> bool:
return self._is_source
+ async def failure(
+ self, message: str, data: object, primary_rel_path: str | None = None,
member_rel_path: str | None = None
+ ) -> atr.models.sql.CheckResult:
+ self.failure_calls.append((message, data))
+ return atr.models.sql.CheckResult(
+ release_name=self.release_name,
+ revision_number=self.revision_number,
+ checker=self.checker,
+ primary_rel_path=primary_rel_path or self.primary_rel_path,
+ member_rel_path=member_rel_path,
+ created=datetime.datetime.now(datetime.UTC),
+ status=atr.models.sql.CheckResultStatus.FAILURE,
+ message=message,
+ data=data,
+ cached=False,
+ )
+
class RepoStub:
def __init__(self, controldir: pathlib.Path, worktree: object) -> None:
@@ -271,6 +329,36 @@ def
test_clone_repo_raises_when_commit_missing(monkeypatch: pytest.MonkeyPatch,
assert git_client.closed is True
[email protected]
+async def test_decompress_archive_calls_extract(monkeypatch:
pytest.MonkeyPatch, tmp_path: pathlib.Path) -> None:
+ archive_path = tmp_path / "artifact.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ extract_dir.mkdir()
+ extract_recorder = ExtractRecorder()
+
+ monkeypatch.setattr(atr.tasks.checks.compare.archives, "extract",
extract_recorder)
+
+ result = await atr.tasks.checks.compare._decompress_archive(archive_path,
extract_dir, 10, 20)
+
+ assert result is True
+ assert extract_recorder.calls == [(str(archive_path), str(extract_dir),
10, 20)]
+
+
[email protected]
+async def test_decompress_archive_handles_extraction_error(
+ monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
+) -> None:
+ archive_path = tmp_path / "artifact.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ extract_dir.mkdir()
+
+ monkeypatch.setattr(atr.tasks.checks.compare.archives, "extract",
ExtractErrorRaiser())
+
+ result = await atr.tasks.checks.compare._decompress_archive(archive_path,
extract_dir, 10, 20)
+
+ assert result is False
+
+
@pytest.mark.asyncio
async def test_source_trees_creates_temp_workspace_and_cleans_up(
monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
@@ -279,10 +367,12 @@ async def
test_source_trees_creates_temp_workspace_and_cleans_up(
args = _make_args(recorder)
payload = _make_payload()
checkout = CheckoutRecorder()
+ decompress = DecompressRecorder()
tmp_root = tmp_path / "temporary-root"
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir",
ReturnValue(tmp_root))
await atr.tasks.checks.compare.source_trees(args)
@@ -290,6 +380,8 @@ async def
test_source_trees_creates_temp_workspace_and_cleans_up(
assert checkout.checkout_dir is not None
checkout_dir = checkout.checkout_dir
assert checkout_dir.name == "github"
+ assert decompress.extract_dir is not None
+ assert decompress.extract_dir.name == "archive"
assert checkout_dir.parent.parent == tmp_root
assert checkout_dir.parent.name.startswith("trees-")
assert await aiofiles.os.path.exists(tmp_root)
@@ -307,11 +399,42 @@ async def
test_source_trees_payload_none_skips_temp_workspace(monkeypatch: pytes
"_checkout_github_source",
RaiseAsync("_checkout_github_source should not be called"),
)
+ monkeypatch.setattr(
+ atr.tasks.checks.compare,
+ "_decompress_archive",
+ RaiseAsync("_decompress_archive should not be called"),
+ )
monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir",
RaiseSync("get_tmp_dir should not be called"))
await atr.tasks.checks.compare.source_trees(args)
[email protected]
+async def test_source_trees_records_failure_when_decompress_fails(
+ monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
+) -> None:
+ recorder = RecorderStub(True)
+ args = _make_args(recorder)
+ payload = _make_payload()
+ checkout = CheckoutRecorder()
+ decompress = DecompressRecorder(return_value=False)
+ tmp_root = tmp_path / "temporary-root"
+
+ monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
+ monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir",
ReturnValue(tmp_root))
+
+ await atr.tasks.checks.compare.source_trees(args)
+
+ assert len(recorder.failure_calls) == 1
+ message, data = recorder.failure_calls[0]
+ assert message == "Failed to extract source archive for comparison"
+ assert isinstance(data, dict)
+ assert data["archive_path"] == str(await recorder.abs_path())
+ assert data["extract_dir"] == str(decompress.extract_dir)
+
+
@pytest.mark.asyncio
async def test_source_trees_skips_when_not_source(monkeypatch:
pytest.MonkeyPatch) -> None:
recorder = RecorderStub(False)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]