commit:     c4994d68dc02a69456434e180d4b4a6e440a6307
Author:     Brian Harring <ferringb <AT> gmail <DOT> com>
AuthorDate: Mon Nov 24 18:12:34 2025 +0000
Commit:     Brian Harring <ferringb <AT> gmail <DOT> com>
CommitDate: Mon Nov 24 19:46:16 2025 +0000
URL:        
https://gitweb.gentoo.org/proj/pkgcore/pkgcheck.git/commit/?id=c4994d68

feat: test_pkg_scan: output debugging info, allow custom hooks

First: output the source path of the expected result since it's pain
trying to debug this if you're an external user.  This states exactly
what file caused it.  For example:

Second: add the ability to use python code to filter out results.  This
can be used in parallel to the normal mechanism, and there are checks
to try and block accidental suppression across the testdata.
Custom handlers can be added as /handler.py with a 'handler' functor
with the signature of typing.Callable[[Result], bool].

Third: fix https://github.com/pkgcore/pkgcheck/issues/760 since it's
causative.  Between bash 5.2 and 5.3 the error output improved,
can't just store a literal error json.  Instead a custom bit
of python can be written to do the enforcement, which is added
here.

Fourth: add as much sanity checks against the handlers as I can think
of.  Being it's python code, addition of a random new handler can lead
to missing/unknowns- thus wire things in a way to try maximally to
identify the offender so debugging failures like this are less of a
heavy lift.

Close: pkgcore/pkgcheck#760
Signed-off-by: Brian Harring <ferringb <AT> gmail.com>

 .../EclassBashSyntaxError/expected.json            |   1 -
 .../EclassCheck/EclassBashSyntaxError/handler.py   |  19 +++
 tests/scripts/test_pkgcheck_scan.py                | 155 +++++++++++++++------
 3 files changed, 134 insertions(+), 41 deletions(-)

diff --git 
a/testdata/data/repos/eclass/EclassCheck/EclassBashSyntaxError/expected.json 
b/testdata/data/repos/eclass/EclassCheck/EclassBashSyntaxError/expected.json
deleted file mode 100644
index 526338b9..00000000
--- a/testdata/data/repos/eclass/EclassCheck/EclassBashSyntaxError/expected.json
+++ /dev/null
@@ -1 +0,0 @@
-{"__class__": "EclassBashSyntaxError", "eclass": "bad", "lineno": "12", 
"error": "syntax error: unexpected end of file"}

diff --git 
a/testdata/data/repos/eclass/EclassCheck/EclassBashSyntaxError/handler.py 
b/testdata/data/repos/eclass/EclassCheck/EclassBashSyntaxError/handler.py
new file mode 100644
index 00000000..8c05e139
--- /dev/null
+++ b/testdata/data/repos/eclass/EclassCheck/EclassBashSyntaxError/handler.py
@@ -0,0 +1,19 @@
+import copy
+
+# TODO: rip this out and just integrate json registries properly
+from pkgcheck.reporters import JsonStream
+
+old_bash_result = list(
+    JsonStream.from_iter(
+        [
+            '{"__class__": "EclassBashSyntaxError", "eclass": "bad", "lineno": 
"12", "error": "syntax error: unexpected end of file"}'
+        ]
+    )
+)[0]
+
+new_bash_result = copy.deepcopy(old_bash_result)
+new_bash_result.error = "syntax error: unexpected end of file from `{' command 
on line 11"  # pyright: ignore[reportAttributeAccessIssue]
+
+
+def handler(result) -> bool:
+    return result == old_bash_result or result == new_bash_result

diff --git a/tests/scripts/test_pkgcheck_scan.py 
b/tests/scripts/test_pkgcheck_scan.py
index b282d3da..218efff7 100644
--- a/tests/scripts/test_pkgcheck_scan.py
+++ b/tests/scripts/test_pkgcheck_scan.py
@@ -1,4 +1,5 @@
-import contextlib
+import importlib
+import importlib.machinery
 import io
 import os
 import pathlib
@@ -594,13 +595,37 @@ class TestPkgcheckScan:
 
     @dataclass
     class _expected_data_result:
-        expected: typing.Iterable[Result]
-        expected_verbose: typing.Iterable[Result]
+        expected: dict[Result, pathlib.Path]
+        expected_verbose: dict[Result, pathlib.Path]
+        custom_filter: typing.Callable[[Result], bool] | None
 
-    def _load_expected_data(self, path: str) -> _expected_data_result:
+    def _load_expected_data(self, base: pathlib.Path) -> _expected_data_result:
         """Return the set of result objects from a given json stream file."""
 
-        def boilerplate(path, allow_missing: bool) -> list[Result]:
+        custom_handler = None
+        try:
+            with (custom_handler_path := base / "handler.py").open() as f:
+                # We can't import since it's not a valid python directory 
layout, nor do
+                # want to pollute the namespace.
+                module = importlib.machinery.SourceFileLoader(
+                    "handler", str(custom_handler_path)
+                ).load_module()
+                if (
+                    custom_handler := typing.cast(
+                        typing.Callable[[Result], bool], getattr(module, 
"handler")
+                    )
+                ) is None:
+                    pytest.fail(
+                        f"custom python handler {custom_handler_path!r} lacks 
the necessary handle function or list of handlers"
+                    )
+
+                if not callable(custom_handler):
+                    pytest.fail(f"{custom_handler_path} handler isn't 
invokable")
+                custom_handler.__source_path__ = custom_handler_path  # 
pyright: ignore[reportFunctionMemberAccess]
+        except FileNotFoundError:
+            pass
+
+        def boilerplate(path, allow_missing: bool):
             try:
                 with path.open() as f:
                     data = list(reporters.JsonStream.from_iter(f))
@@ -614,22 +639,25 @@ class TestPkgcheckScan:
                     # Remove this after cleaning the scan/fix logic up to not 
force duplicate
                     # renders, and instead just work with a result stream 
directly.
                     assert self._render_results(data), f"failed rendering 
results {data!r}"
-                    return data
+                    return typing.cast(dict[Result, pathlib.Path], 
{}.fromkeys(data, path))
 
             except FileNotFoundError:
                 if not allow_missing:
                     raise
-                return []
-
-        expected_path = self.repos_data / path / "expected.json"
-
-        expected = boilerplate(expected_path, False)
-        assert expected, f"regular results must always exist if the file 
exists: {expected_path}"
+                return {}
+
+        expected_path = base / "expected.json"
+        # if a custom handler exists, then the json definitions aren't 
required.
+        expected = boilerplate(expected_path, custom_handler is not None)
+        if custom_handler is None:
+            assert expected, (
+                f"regular results must always exist if the file exists: 
{expected_path}"
+            )
 
-        expected_verbose_path = self.repos_data / path / 
"expected-verbose.json"
+        expected_verbose_path = base / "expected-verbose.json"
         expected_verbose = boilerplate(expected_verbose_path, True)
 
-        return self._expected_data_result(expected, 
expected_verbose=expected_verbose)
+        return self._expected_data_result(expected, expected_verbose, 
custom_handler)
 
     def _render_results(self, results, **kwargs) -> str:
         """Render a given set of result objects into their related string 
form."""
@@ -642,41 +670,88 @@ class TestPkgcheckScan:
     @pytest.mark.parametrize("repo", repos)
     def test_scan_repo(self, repo, tmp_path_factory):
         """Run pkgcheck against test pkgs in bundled repo, verifying result 
output."""
-        expected_results = set()
+
+        # _sources is so people debugging failures know where the testdata 
came from.  It matters in regards to devex.
+        expected_results_sources = {}
         scan_results = self._scan_results(repo, 
tmp_path_factory.mktemp("scan"), verbosity=0)
 
-        expected_verbose_results = set()
+        expected_verbose_results_sources = {}
         scan_verbose_results = self._scan_results(repo, 
tmp_path_factory.mktemp("ver"), verbosity=1)
 
+        custom_handlers: list[typing.Callable[[Result], bool]] = []
+
         for check, keywords in self._checks[repo].items():
             for keyword in keywords:
-                data = self._load_expected_data(f"{repo}/{check}/{keyword}")
-                expected_results.update(data.expected)
+                path = self.repos_data / repo / check / keyword
+                data = self._load_expected_data(path)
+                if conflict := {
+                    k: [v, expected_results_sources[k]]
+                    for k, v in data.expected.items()
+                    if k in expected_results_sources
+                }:
+                    pytest.fail(
+                        f"conflicting results found in testdata for the 
following fixtures: {conflict!r}"
+                    )
+
+                expected_results_sources.update(data.expected)
 
                 if data.expected_verbose:
-                    expected_verbose_results.update(data.expected_verbose)
+                    
expected_verbose_results_sources.update(data.expected_verbose)
                 else:
-                    expected_verbose_results.update(data.expected)
-
-        if expected_results != scan_results:
-            missing = self._render_results(expected_results - scan_results)
-            unknown = self._render_results(scan_results - expected_results)
-            error = ["unmatched repo scan results:\n\n"]
-            if missing:
-                error.append(f"{repo} repo missing expected 
results:\n{missing}")
-            if unknown:
-                error.append(f"{repo} repo unknown results:\n{unknown}")
-            pytest.fail("\n".join(error), pytrace=False)
-
-        if expected_verbose_results != scan_verbose_results:
-            missing = self._render_results(expected_verbose_results - 
scan_verbose_results)
-            unknown = self._render_results(scan_verbose_results - 
expected_verbose_results)
-            error = ["unmatched verbose repo scan results:\n\n"]
-            if missing:
-                error.append(f"{repo} repo missing expected 
results:\n{missing}")
-            if unknown:
-                error.append(f"{repo} repo unknown results:\n{unknown}")
-            pytest.fail("\n".join(error), pytrace=False)
+                    expected_verbose_results_sources.update(data.expected)
+
+                if data.custom_filter is not None:
+                    custom_handlers.append(data.custom_filter)
+
+        for handler in custom_handlers:
+            try:
+                # sanity checks; both that they don't intersect expected 
results from other testdata,
+                # and also do the filtering.
+                for verbose_text, expected, actual in (
+                    ("", expected_results_sources, scan_results),
+                    ("verbose ", expected_verbose_results_sources, 
scan_verbose_results),
+                ):
+                    if intersection := list(filter(handler, expected)):
+                        pytest.fail(
+                            f"handler from {handler.__source_file__!r} 
incorrectly suppresses {verbose_text}test data: {intersection}"  # pyright: 
ignore[reportFunctionMemberAccess]
+                        )
+
+                    for k in list(filter(handler, actual)):
+                        actual.remove(k)
+
+            except Exception as e:
+                pytest.fail(
+                    f"handler {data.custom_filter.__source_path__!r} threw an 
exception: {e!r}"  # type: ignore
+                )
+
+        def assert_same(sources, results, verbose=False):
+            expected = set(sources)
+            errors = []
+            if missing := expected.difference(results):
+                lines = [
+                    f"from source: {sources[x]}, 
expected:\n{self._render_results([x])}"
+                    for x in missing
+                ]
+                errors.append("\n".join(lines))
+                for handler in custom_handlers:
+                    for result in missing:
+                        if handler(result):
+                            errors.append(
+                                f"possible cause: handler 
{handler.__source_file__} matches"  # pyright: 
ignore[reportFunctionMemberAccess]
+                            )
+
+            if unknown := results.difference(sources):
+                text = self._render_results(unknown)
+                errors.append(f"unknown results:\n{text}")
+
+            if errors:
+                verbose = "verbose " if verbose else ""
+                pytest.fail(
+                    f"repo {repo} {verbose}scan failures:\n" + 
"\n\n".join(errors), pytrace=False
+                )
+
+        assert_same(expected_results_sources, scan_results)
+        assert_same(expected_verbose_results_sources, scan_verbose_results, 
True)
 
     @staticmethod
     def _patch(fix, repo_path):

Reply via email to