This is an automated email from the ASF dual-hosted git repository.
Yicong-Huang pushed a commit to branch release/v1.1.0-incubating
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/release/v1.1.0-incubating by
this push:
new 4d900340f6 fix(licensing): preserve all versions in check_binary_deps
multi-version diff (#4711)
4d900340f6 is described below
commit 4d900340f66ab2635f222fd7a8191583ac6e9820
Author: Jiadong Bai <[email protected]>
AuthorDate: Sun May 3 07:54:48 2026 +0000
fix(licensing): preserve all versions in check_binary_deps multi-version
diff (#4711)
### What changes were proposed in this PR?
Fixes a latent bug in `bin/licensing/check_binary_deps.py` — the
internal indexers used `dict[name, version]` (one version per name), so
when the same name appeared with two different versions the second
assignment silently overwrote the first. Concretely, particularly, this
PR switches all three indexers to the same shape:
```python
_index_npm : dict[str, set[str]] # name -> versions
_index_python : dict[str, set[str]] # name -> versions
_index_jar : dict[str, set[str]] # artifact -> versions
```
`diff_simple` / `diff_jars` are updated to emit per-version `added` /
`stale` and per-name `drift` tuples shaped `(name, sorted_claimed,
sorted_real)`. Multi-version drift renders as
```
~ jetty-server: LICENSE-binary=9.4.20.v20190813, 11.0.20
bundled=9.4.20.v20190813, 11.0.21
```
falling back to the existing single-version form when there's only one
version on each side. As a side benefit, `added` / `stale` lines now
include the version (this regressed in #4693 which printed bare names).
This PR also adds several **unit tests** for the `check_license_binary`
script.
Wired into the `amber` job in `build.yml` right after Python setup
(before any `check_binary_deps.py` invocation):
```yaml
- name: Unit-test licensing scripts
run: python3 -m unittest discover -s bin/licensing -p "test_*.py" -v
```
### Any related issues, documentation, discussions?
Follow-up to #4693
### How was this PR tested?
`python3 -m unittest discover -s bin/licensing -p "test_*.py" -v` runs
27 tests in 4ms, all passing. The new step in CI runs the same command
on every PR that exercises the `amber` job. Manually verified end-to-end
against the real combined LICENSE-binary built via
`concat_license_binary.py`.
### Was this PR authored or co-authored using generative AI tooling?
Generated-by: Claude Code (claude-opus-4-7)
---------
(backported from commit 9ece88e1a6a74a8df02da1746923f009e46c4b20)
Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
.github/workflows/build.yml | 5 +
bin/licensing/check_binary_deps.py | 149 +++++++------
bin/licensing/test_check_binary_deps.py | 358 ++++++++++++++++++++++++++++++++
3 files changed, 452 insertions(+), 60 deletions(-)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 79363ff1d4..94472a0a35 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -379,6 +379,11 @@ jobs:
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
+ - name: Unit-test licensing scripts
+ # Stdlib only, no install needed. Runs on every matrix row (3.10 →
+ # 3.13) so the script's behavior is guarded across all supported
+ # Python versions before the license check itself runs (3.12 only).
+ run: python3 -m unittest discover -s bin/licensing -p "test_*.py" -v
- name: Install dependencies
run: |
python -m pip install --upgrade pip
diff --git a/bin/licensing/check_binary_deps.py
b/bin/licensing/check_binary_deps.py
index 5b55c70e16..c85f093062 100755
--- a/bin/licensing/check_binary_deps.py
+++ b/bin/licensing/check_binary_deps.py
@@ -40,6 +40,7 @@ import json
import re
import sys
import tempfile
+from collections import defaultdict
from pathlib import Path
# Per-module LICENSE-binary files that the combined LICENSE-binary unions.
@@ -292,43 +293,52 @@ def collect_python(path: Path) -> set[str]:
SCALA_SUFFIX = re.compile(r"_\d+(?:\.\d+)+$")
-def _index_npm(items: set[str]) -> dict[str, str]:
- """{ '[email protected]', '@scope/[email protected]' } -> { 'react': '18.2.0',
'@scope/foo': '1.0' }."""
- out: dict[str, str] = {}
+def _index_npm(items: set[str]) -> dict[str, set[str]]:
+ """{ '[email protected]', '[email protected]' } -> { 'react': {'18.2.0', '17.0.0'} }.
+ Same name can legitimately appear at multiple versions when the bundle
+ pulls in two majors of a transitive dep."""
+ out: dict[str, set[str]] = defaultdict(set)
for entry in items:
# Last '@' is the version separator; '@' inside scoped names is at
index 0.
idx = entry.rfind("@")
if idx <= 0:
continue
- out[entry[:idx]] = entry[idx + 1:]
+ out[entry[:idx]].add(entry[idx + 1:])
return out
-def _index_python(items: set[str]) -> dict[str, str]:
- """{ 'numpy==2.1.0' } -> { 'numpy': '2.1.0' }."""
- out: dict[str, str] = {}
+def _index_python(items: set[str]) -> dict[str, set[str]]:
+ """{ 'numpy==2.1.0' } -> { 'numpy': {'2.1.0'} }."""
+ out: dict[str, set[str]] = defaultdict(set)
for entry in items:
if "==" not in entry:
continue
name, _, ver = entry.partition("==")
- out[name] = ver
+ out[name].add(ver)
return out
-def _index_jar(items: set[str]) -> dict[str, tuple[str, str]]:
- """{ 'netty-all-4.1.96.Final.jar' } -> { 'netty-all': ('4.1.96.Final',
'<basename>') }.
- Falls back to keying by the full basename when version extraction fails;
- such entries can still be flagged as added/stale but not as drift."""
- out: dict[str, tuple[str, str]] = {}
+def _index_jar(items: set[str]) -> dict[str, set[str]]:
+ """{ 'netty-all-4.1.96.Final.jar' } -> { 'netty-all': {'4.1.96.Final'} }.
+ Same shape as _index_npm / _index_python: an artifact legitimately
+ bundled at multiple versions (e.g. logback at 1.2.x in one service
+ and 1.4.x in another) survives intact. Unparseable jar names are
+ surfaced loudly rather than silently dropped — a parser bug here
+ means real bundled deps would skip license validation."""
+ out: dict[str, set[str]] = defaultdict(set)
for jar in items:
m = JAR_NAME_VERSION.match(jar)
- if m:
- out[m.group(1)] = (m.group(2), jar)
- else:
- out[jar] = ("", jar) # unparseable — version "" sentinel
+ if not m:
+ sys.stderr.write(f"warning: cannot parse jar name: {jar}\n")
+ continue
+ out[m.group(1)].add(m.group(2))
return out
+def _jar_basename(artifact: str, version: str) -> str:
+ return f"{artifact}-{version}.jar"
+
+
def _is_direct_jar(artifact: str, direct_artifacts: set[str]) -> bool:
"""sbt-native-packager's default JavaAppPackaging names dist jars
`<groupId>.<artifactId>-<version>.jar`, so the artifactId we extract from
@@ -384,13 +394,17 @@ def report(
print()
rc = 1
+ def _fmt_drift(entry: tuple[str, list[str], list[str]]) -> str:
+ name, cvers, rvers = entry
+ return f" ~ {name}: LICENSE-binary={', '.join(cvers)} bundled={',
'.join(rvers)}"
+
if drift_direct:
- print(f"DRIFT (direct) {label} — claimed version differs from
bundled:")
- for name, claimed_v, real_v in sorted(drift_direct):
- print(f" ~ {name}: LICENSE-binary={claimed_v} bundled={real_v}")
+ print(f"DRIFT (direct) {label} — claimed versions differ from
bundled:")
+ for entry in sorted(drift_direct):
+ print(_fmt_drift(entry))
print()
print("ACTION REQUIRED")
- print(f" Update LICENSE-binary to match the bundled version. Direct
deps")
+ print(f" Update LICENSE-binary to match the bundled versions. Direct
deps")
print(f" always block CI — a version bump may carry license changes.")
print()
rc = 1
@@ -398,18 +412,18 @@ def report(
if drift_transitive:
if ignore_transitive_version:
print(f"DRIFT (transitive, informational) {label}:")
- for name, claimed_v, real_v in sorted(drift_transitive):
- print(f" ~ {name}: LICENSE-binary={claimed_v}
bundled={real_v}")
+ for entry in sorted(drift_transitive):
+ print(_fmt_drift(entry))
print(f" (--ignore-transitive-version is set; nightly
exact-match")
print(f" check on main is responsible for refreshing these.)")
print()
else:
- print(f"DRIFT (transitive) {label} — claimed version differs from
bundled:")
- for name, claimed_v, real_v in sorted(drift_transitive):
- print(f" ~ {name}: LICENSE-binary={claimed_v}
bundled={real_v}")
+ print(f"DRIFT (transitive) {label} — claimed versions differ from
bundled:")
+ for entry in sorted(drift_transitive):
+ print(_fmt_drift(entry))
print()
print("ACTION REQUIRED")
- print(f" Update LICENSE-binary to match the bundled version, or
rerun")
+ print(f" Update LICENSE-binary to match the bundled versions, or
rerun")
print(f" with --ignore-transitive-version to treat transitive
drift as")
print(f" informational.")
print()
@@ -421,48 +435,63 @@ def report(
# --- main ------------------------------------------------------------------
def diff_simple(
- claim_idx: dict[str, str],
- real_idx: dict[str, str],
+ claim_idx: dict[str, set[str]],
+ real_idx: dict[str, set[str]],
direct_names: set[str],
-) -> tuple[list[str], list[str], list[tuple[str, str, str]], list[tuple[str,
str, str]]]:
- """Diff claims vs reality keyed by name. Same shape for npm/python."""
- added = sorted(real_idx.keys() - claim_idx.keys())
- stale = sorted(claim_idx.keys() - real_idx.keys())
- drift_direct: list[tuple[str, str, str]] = []
- drift_transitive: list[tuple[str, str, str]] = []
+ joiner: str,
+) -> tuple[list[str], list[str], list[tuple[str, list[str], list[str]]],
list[tuple[str, list[str], list[str]]]]:
+ """Diff name->{versions} multimaps for npm/python. `joiner` is the
+ separator used when rendering added/stale entries (`@` for npm, `==`
+ for python). Drifts are returned as (name, sorted_claimed_versions,
+ sorted_real_versions)."""
+ added: list[str] = []
+ stale: list[str] = []
+ drift_direct: list[tuple[str, list[str], list[str]]] = []
+ drift_transitive: list[tuple[str, list[str], list[str]]] = []
+
+ for name in sorted(real_idx.keys() - claim_idx.keys()):
+ for v in sorted(real_idx[name]):
+ added.append(f"{name}{joiner}{v}")
+ for name in sorted(claim_idx.keys() - real_idx.keys()):
+ for v in sorted(claim_idx[name]):
+ stale.append(f"{name}{joiner}{v}")
for name in sorted(claim_idx.keys() & real_idx.keys()):
- c, r = claim_idx[name], real_idx[name]
- if c != r:
- entry = (name, c, r)
- (drift_direct if name in direct_names else
drift_transitive).append(entry)
+ cvers, rvers = claim_idx[name], real_idx[name]
+ if cvers == rvers:
+ continue
+ entry = (name, sorted(cvers), sorted(rvers))
+ (drift_direct if name in direct_names else
drift_transitive).append(entry)
return added, stale, drift_direct, drift_transitive
def diff_jars(
- claim_idx: dict[str, tuple[str, str]],
- real_idx: dict[str, tuple[str, str]],
+ claim_idx: dict[str, set[str]],
+ real_idx: dict[str, set[str]],
direct_artifacts: set[str],
-) -> tuple[list[str], list[str], list[tuple[str, str, str]], list[tuple[str,
str, str]]]:
- """Like diff_simple but the index value is (version, jar_basename) so
- added/stale can be reported using the full basename users will see in
- LICENSE-binary."""
+) -> tuple[list[str], list[str], list[tuple[str, list[str], list[str]]],
list[tuple[str, list[str], list[str]]]]:
+ """Diff artifact->{versions} multimaps. Added/stale are rendered as
+ full jar basenames users will see in LICENSE-binary; drifts are
+ (artifact, sorted_claimed, sorted_real)."""
added: list[str] = []
stale: list[str] = []
- drift_direct: list[tuple[str, str, str]] = []
- drift_transitive: list[tuple[str, str, str]] = []
+ drift_direct: list[tuple[str, list[str], list[str]]] = []
+ drift_transitive: list[tuple[str, list[str], list[str]]] = []
+
for artifact in sorted(real_idx.keys() - claim_idx.keys()):
- added.append(real_idx[artifact][1])
+ for v in sorted(real_idx[artifact]):
+ added.append(_jar_basename(artifact, v))
for artifact in sorted(claim_idx.keys() - real_idx.keys()):
- stale.append(claim_idx[artifact][1])
+ for v in sorted(claim_idx[artifact]):
+ stale.append(_jar_basename(artifact, v))
for artifact in sorted(claim_idx.keys() & real_idx.keys()):
- cv, _cname = claim_idx[artifact]
- rv, _rname = real_idx[artifact]
- if cv != rv:
- entry = (artifact, cv, rv)
- if _is_direct_jar(artifact, direct_artifacts):
- drift_direct.append(entry)
- else:
- drift_transitive.append(entry)
+ cvers, rvers = claim_idx[artifact], real_idx[artifact]
+ if cvers == rvers:
+ continue
+ entry = (artifact, sorted(cvers), sorted(rvers))
+ if _is_direct_jar(artifact, direct_artifacts):
+ drift_direct.append(entry)
+ else:
+ drift_transitive.append(entry)
return added, stale, drift_direct, drift_transitive
@@ -514,7 +543,7 @@ def main() -> int:
claimed = parse_prose(lb, "npm")
reality = collect_npm(Path(args.inputs[0]))
direct = load_direct_npm("frontend/package.json")
- added, stale, dd, dt = diff_simple(_index_npm(claimed),
_index_npm(reality), direct)
+ added, stale, dd, dt = diff_simple(_index_npm(claimed),
_index_npm(reality), direct, joiner="@")
rc = report(added, stale, dd, dt, "npm packages", "npm",
args.ignore_transitive_version)
if rc == 0:
print(f"OK: {len(reality)} npm packages match LICENSE-binary.")
@@ -524,7 +553,7 @@ def main() -> int:
claimed = parse_prose(lb, "agent-npm")
reality = collect_npm(Path(args.inputs[0]))
direct = load_direct_npm("agent-service/package.json")
- added, stale, dd, dt = diff_simple(_index_npm(claimed),
_index_npm(reality), direct)
+ added, stale, dd, dt = diff_simple(_index_npm(claimed),
_index_npm(reality), direct, joiner="@")
rc = report(added, stale, dd, dt, "agent-service npm packages",
"agent-npm", args.ignore_transitive_version)
if rc == 0:
print(f"OK: {len(reality)} agent-service npm packages match
LICENSE-binary.")
@@ -534,7 +563,7 @@ def main() -> int:
claimed = parse_prose(lb, "python")
reality = collect_python(Path(args.inputs[0]))
direct = load_direct_python()
- added, stale, dd, dt = diff_simple(_index_python(claimed),
_index_python(reality), direct)
+ added, stale, dd, dt = diff_simple(_index_python(claimed),
_index_python(reality), direct, joiner="==")
rc = report(added, stale, dd, dt, "Python packages", "python",
args.ignore_transitive_version)
if rc == 0:
print(f"OK: {len(reality)} Python packages match LICENSE-binary.")
diff --git a/bin/licensing/test_check_binary_deps.py
b/bin/licensing/test_check_binary_deps.py
new file mode 100644
index 0000000000..36fbb0c41f
--- /dev/null
+++ b/bin/licensing/test_check_binary_deps.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Unit tests for check_binary_deps.
+
+Run via:
+ python3 -m unittest bin.licensing.test_check_binary_deps
+or:
+ python3 -m unittest discover -s bin/licensing -p "test_*.py"
+
+These tests use only the Python standard library — no pytest, no project
+deps — so they can run in any CI job that has Python set up.
+"""
+from __future__ import annotations
+
+import csv
+import io
+import sys
+import tempfile
+import textwrap
+import unittest
+from contextlib import redirect_stderr, redirect_stdout
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+import check_binary_deps as cbd # noqa: E402
+
+
+# --- pure-function tests ---------------------------------------------------
+
+
+class IndexersPreserveAllVersions(unittest.TestCase):
+ """Regression test for the bug where dict-keyed-by-name indexers
+ silently dropped a duplicate-name entry. The combined LICENSE-binary
+ on `main` legitimately claims the same artifact at two versions in
+ 97 cases (e.g. logback 1.2.x + 1.4.x); the indexer must preserve all
+ of them."""
+
+ def test_index_npm_keeps_multiple_versions(self):
+ idx = cbd._index_npm({"[email protected]", "[email protected]",
"[email protected]"})
+ self.assertEqual(idx["react"], {"17.0.0", "18.2.0"})
+ self.assertEqual(idx["lodash"], {"4.17.21"})
+
+ def test_index_npm_handles_scoped_names(self):
+ # `@scope/name@version` — the version separator is the LAST `@`.
+ idx = cbd._index_npm({"@angular/[email protected]", "@angular/[email protected]"})
+ self.assertEqual(idx["@angular/core"], {"17.0.0", "18.0.0"})
+
+ def test_index_python_keeps_multiple_versions(self):
+ idx = cbd._index_python({"numpy==2.1.0", "numpy==2.0.0",
"pandas==2.2.3"})
+ self.assertEqual(idx["numpy"], {"2.0.0", "2.1.0"})
+ self.assertEqual(idx["pandas"], {"2.2.3"})
+
+ def test_index_jar_keeps_multiple_versions(self):
+ # Two versions of the same artifact, plus an unrelated one.
+ idx = cbd._index_jar({
+ "ch.qos.logback.logback-classic-1.2.3.jar",
+ "ch.qos.logback.logback-classic-1.4.14.jar",
+ "io.netty.netty-buffer-4.1.96.Final.jar",
+ })
+ self.assertEqual(
+ idx["ch.qos.logback.logback-classic"], {"1.2.3", "1.4.14"}
+ )
+ self.assertEqual(idx["io.netty.netty-buffer"], {"4.1.96.Final"})
+
+ def test_index_jar_warns_on_unparseable_name(self):
+ buf = io.StringIO()
+ with redirect_stderr(buf):
+ idx = cbd._index_jar({"weird-no-version.jar"})
+ self.assertEqual(idx, {})
+ self.assertIn("cannot parse jar name", buf.getvalue())
+
+
+class JarBasenameRoundTrip(unittest.TestCase):
+ """`_jar_basename` must reconstruct the exact basename that
+ `JAR_NAME_VERSION` parsed, including for jars with classifier
+ suffixes (the version regex captures the whole tail)."""
+
+ def test_round_trip_simple(self):
+ for jar in [
+ "io.netty.netty-buffer-4.1.96.Final.jar",
+ "commons-cli-1.5.0.jar",
+ "scala-library-2.13.10.jar",
+
"io.netty.netty-tcnative-boringssl-static-2.0.61.Final-linux-x86_64.jar",
+ "co.fs2.fs2-core_2.13-3.12.2.jar",
+ ]:
+ with self.subTest(jar=jar):
+ m = cbd.JAR_NAME_VERSION.match(jar)
+ self.assertIsNotNone(m, f"failed to parse {jar}")
+ self.assertEqual(cbd._jar_basename(m.group(1), m.group(2)),
jar)
+
+
+class IsDirectJar(unittest.TestCase):
+ """`_is_direct_jar` reconciles SBT's bare artifactId with
+ sbt-native-packager's `<groupId>.<artifactId>-<version>.jar` jar
+ naming, plus Scala's `_<scalaVer>` suffix on `%%` libs."""
+
+ direct = {"netty-buffer", "jersey-common", "fs2-core"}
+
+ def test_group_prefixed(self):
+ self.assertTrue(cbd._is_direct_jar("io.netty.netty-buffer",
self.direct))
+
self.assertTrue(cbd._is_direct_jar("org.glassfish.jersey.core.jersey-common",
self.direct))
+
+ def test_bare_artifact(self):
+ self.assertTrue(cbd._is_direct_jar("netty-buffer", self.direct))
+
+ def test_scala_suffix_on_group_prefixed(self):
+ self.assertTrue(cbd._is_direct_jar("co.fs2.fs2-core_2.13",
self.direct))
+
+ def test_unknown_artifact(self):
+ self.assertFalse(cbd._is_direct_jar("some.thing.unrelated",
self.direct))
+ self.assertFalse(cbd._is_direct_jar("unrelated", self.direct))
+
+
+class DiffSimple(unittest.TestCase):
+ """`diff_simple` (npm/python): added/stale must include version,
+ drift must be reported per-name with both version sets."""
+
+ def test_clean_no_diff(self):
+ idx = {"a": {"1.0"}, "b": {"2.0"}}
+ added, stale, dd, dt = cbd.diff_simple(idx, idx, set(), joiner="==")
+ self.assertEqual((added, stale, dd, dt), ([], [], [], []))
+
+ def test_added_and_stale_include_version(self):
+ claim = {"a": {"1.0"}}
+ real = {"b": {"2.0"}}
+ added, stale, dd, dt = cbd.diff_simple(claim, real, set(), joiner="==")
+ self.assertEqual(added, ["b==2.0"])
+ self.assertEqual(stale, ["a==1.0"])
+ self.assertEqual(dd, [])
+ self.assertEqual(dt, [])
+
+ def test_added_and_stale_emit_one_entry_per_version(self):
+ # Brand-new package bundled at two versions: surface both.
+ claim = {}
+ real = {"newpkg": {"1.0", "2.0"}}
+ added, stale, dd, dt = cbd.diff_simple(claim, real, set(), joiner="==")
+ self.assertEqual(added, ["newpkg==1.0", "newpkg==2.0"])
+
+ def test_single_version_drift_classified_direct_vs_transitive(self):
+ claim = {"foo": {"1.0"}, "bar": {"1.0"}}
+ real = {"foo": {"1.1"}, "bar": {"1.1"}}
+ added, stale, dd, dt = cbd.diff_simple(claim, real, {"foo"},
joiner="==")
+ self.assertEqual(added, [])
+ self.assertEqual(stale, [])
+ self.assertEqual(dd, [("foo", ["1.0"], ["1.1"])])
+ self.assertEqual(dt, [("bar", ["1.0"], ["1.1"])])
+
+ def test_multi_version_drift_reports_both_sides(self):
+ # The bug this PR fixes: previously these collapsed.
+ claim = {"jetty": {"9.4.20", "11.0.20"}}
+ real = {"jetty": {"9.4.20", "11.0.21"}}
+ _, _, _, dt = cbd.diff_simple(claim, real, set(), joiner="==")
+ self.assertEqual(dt, [("jetty", ["11.0.20", "9.4.20"], ["11.0.21",
"9.4.20"])])
+
+ def test_npm_joiner(self):
+ claim, real = {}, {"react": {"18.2.0"}}
+ added, _, _, _ = cbd.diff_simple(claim, real, set(), joiner="@")
+ self.assertEqual(added, ["[email protected]"])
+
+
+class DiffJars(unittest.TestCase):
+ """`diff_jars`: same shape as diff_simple but added/stale are full
+ jar basenames (reconstructed via `_jar_basename`), and direct/
+ transitive classification uses `_is_direct_jar`."""
+
+ def test_clean(self):
+ idx = {"io.netty.netty-buffer": {"4.1.96.Final"}}
+ added, stale, dd, dt = cbd.diff_jars(idx, idx, set())
+ self.assertEqual((added, stale, dd, dt), ([], [], [], []))
+
+ def test_added_stale_use_full_basename(self):
+ claim = {"a.b": {"1.0"}}
+ real = {"x.y": {"2.0"}}
+ added, stale, _, _ = cbd.diff_jars(claim, real, set())
+ self.assertEqual(added, ["x.y-2.0.jar"])
+ self.assertEqual(stale, ["a.b-1.0.jar"])
+
+ def test_multi_version_added_stale_emits_one_basename_per_version(self):
+ claim = {}
+ real = {"io.netty.netty-buffer": {"4.1.96.Final", "4.1.100.Final"}}
+ added, _, _, _ = cbd.diff_jars(claim, real, set())
+ self.assertEqual(
+ added,
+ [
+ "io.netty.netty-buffer-4.1.100.Final.jar",
+ "io.netty.netty-buffer-4.1.96.Final.jar",
+ ],
+ )
+
+ def test_drift_direct_vs_transitive_via_group_prefixed_match(self):
+ # `netty-buffer` is direct (declared in SBT bare); the bundled jar
+ # is `io.netty.netty-buffer` (sbt-native-packager naming).
+ claim = {
+ "io.netty.netty-buffer": {"4.1.96.Final"},
+ "org.unknown.thing": {"1.0"},
+ }
+ real = {
+ "io.netty.netty-buffer": {"4.1.100.Final"},
+ "org.unknown.thing": {"1.1"},
+ }
+ _, _, dd, dt = cbd.diff_jars(claim, real, {"netty-buffer"})
+ self.assertEqual(dd, [("io.netty.netty-buffer", ["4.1.96.Final"],
["4.1.100.Final"])])
+ self.assertEqual(dt, [("org.unknown.thing", ["1.0"], ["1.1"])])
+
+
+# --- end-to-end tests ------------------------------------------------------
+
+
+def _write_lb(text: str) -> Path:
+ p = Path(tempfile.mkstemp(suffix=".txt")[1])
+ p.write_text(text)
+ return p
+
+
+def _write_pip_csv(rows: list[tuple[str, str]]) -> Path:
+ p = Path(tempfile.mkstemp(suffix=".csv")[1])
+ with p.open("w", newline="") as f:
+ w = csv.writer(f)
+ w.writerow(["Name", "Version", "License"])
+ for name, ver in rows:
+ w.writerow([name, ver, "BSD"])
+ return p
+
+
+# Synthetic LICENSE-binary fixture mirroring the per-module file format
+# (Apache-2 / MIT divider lines + `Python packages:` header + bullets).
+# Two python packages claimed: one at one version, one at two versions.
+SYNTHETIC_LB = textwrap.dedent("""\
+ Apache header etc.
+
+
--------------------------------------------------------------------------------
+ Dependencies under the Apache License, Version 2.0
+
--------------------------------------------------------------------------------
+
+ Python packages:
+ - direct-pkg==1.0.0
+ - transitive-pkg==2.0.0
+ - transitive-pkg==2.5.0
+""")
+
+
+class EndToEndPython(unittest.TestCase):
+ """Run main() against a synthetic LICENSE-binary + pip-licenses CSV
+ and assert the exit codes for each behavior class."""
+
+ def setUp(self):
+ self.lb = _write_lb(SYNTHETIC_LB)
+
+ def _run(self, csv_rows: list[tuple[str, str]], *flags: str) -> int:
+ # main() reads sys.argv; route stdout/stderr through buffers so
+ # failures don't pollute test output.
+ csv_path = _write_pip_csv(csv_rows)
+ argv_save = sys.argv
+ sys.argv = [
+ "x", "--license-binary", str(self.lb), *flags,
+ "python", str(csv_path),
+ ]
+ # Patch direct-deps loader to a known set rather than reading
+ # the real repo's requirements.txt.
+ loader_save = cbd.load_direct_python
+ cbd.load_direct_python = lambda: {"direct-pkg"}
+ try:
+ with redirect_stdout(io.StringIO()),
redirect_stderr(io.StringIO()):
+ return cbd.main()
+ finally:
+ sys.argv = argv_save
+ cbd.load_direct_python = loader_save
+
+ def test_clean_passes(self):
+ # Reality matches all 3 claimed (name, version) pairs.
+ rc = self._run([
+ ("direct-pkg", "1.0.0"),
+ ("transitive-pkg", "2.0.0"),
+ ("transitive-pkg", "2.5.0"),
+ ])
+ self.assertEqual(rc, 0)
+
+ def test_transitive_drift_strict_fails(self):
+ rc = self._run([
+ ("direct-pkg", "1.0.0"),
+ ("transitive-pkg", "2.0.0"),
+ ("transitive-pkg", "2.6.0"), # bumped from 2.5.0
+ ])
+ self.assertEqual(rc, 1)
+
+ def test_transitive_drift_with_flag_passes(self):
+ rc = self._run(
+ [
+ ("direct-pkg", "1.0.0"),
+ ("transitive-pkg", "2.0.0"),
+ ("transitive-pkg", "2.6.0"),
+ ],
+ "--ignore-transitive-version",
+ )
+ self.assertEqual(rc, 0)
+
+ def test_direct_drift_with_flag_still_fails(self):
+ rc = self._run(
+ [
+ ("direct-pkg", "1.1.0"), # bumped
+ ("transitive-pkg", "2.0.0"),
+ ("transitive-pkg", "2.5.0"),
+ ],
+ "--ignore-transitive-version",
+ )
+ self.assertEqual(rc, 1)
+
+ def test_added_with_flag_still_fails(self):
+ rc = self._run(
+ [
+ ("direct-pkg", "1.0.0"),
+ ("transitive-pkg", "2.0.0"),
+ ("transitive-pkg", "2.5.0"),
+ ("brand-new", "9.9.9"), # not claimed
+ ],
+ "--ignore-transitive-version",
+ )
+ self.assertEqual(rc, 1)
+
+ def test_stale_with_flag_still_fails(self):
+ # Drop both versions of transitive-pkg from reality.
+ rc = self._run(
+ [("direct-pkg", "1.0.0")],
+ "--ignore-transitive-version",
+ )
+ self.assertEqual(rc, 1)
+
+ def test_dropping_one_of_multi_versions_is_drift_not_stale(self):
+ # transitive-pkg is still in reality (at one version); the missing
+ # version is drift — passes with the flag.
+ rc = self._run(
+ [
+ ("direct-pkg", "1.0.0"),
+ ("transitive-pkg", "2.5.0"),
+ ],
+ "--ignore-transitive-version",
+ )
+ self.assertEqual(rc, 0)
+
+
+if __name__ == "__main__":
+ unittest.main()