This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git


The following commit(s) were added to refs/heads/main by this push:
     new 9b56ffd  generate-cve-json: derive project_dir from package_pattern 
regex (#161)
9b56ffd is described below

commit 9b56ffdee4778b04e5318540d499b06d6bcb4971
Author: Jarek Potiuk <[email protected]>
AuthorDate: Fri May 15 03:33:48 2026 +0200

    generate-cve-json: derive project_dir from package_pattern regex (#161)
    
    `_product_for_package` was anchored on a hardcoded
    `PROJECT_PREFIX = f"{TOP_LEVEL_NAME}-project-"`, which only matches
    projects whose subpackages happen to live under `-project-<dir>`.
    Projects that ship subpackages under a different prefix —
    `-providers-<dir>` is the realistic example shipped by Apache
    Airflow on PyPI — saw the prefix check fail and fell through to
    returning the raw PyPI package name as `product`. The project's
    `project_display_map` and `project_product_template` were silently
    ignored even when the corresponding `<project>` group was correctly
    captured by `package_pattern`.
    
    The fix uses the `project` named group from the configured
    `package_pattern` regex instead of slicing on a hardcoded prefix.
    The prefix is now a project-level convention encoded in the regex,
    not a generator constant. Adopters can use whatever subpackage
    prefix matches their PyPI taxonomy (`-project-`, `-providers-`, …)
    and the display-map lookup fires for any of them.
    
    `PROJECT_PREFIX` is dropped entirely; nothing else in the codebase
    read it.
    
    Adds a regression fixture (`cve-json-config-providers.toml`) and a
    `TestProductForPackageProvidersStyle` class exercising a
    `-providers-<dir>` taxonomy, so a future revert of this fix would
    fail loudly.
    
    Generated-by: Claude Code (Claude Opus 4.7)
---
 .../src/generate_cve_json/cve_json.py              | 31 +++++-----
 .../tests/fixtures/cve-json-config-providers.toml  | 60 +++++++++++++++++++
 .../tests/test_generate_cve_json.py                | 67 ++++++++++++++++++++++
 3 files changed, 143 insertions(+), 15 deletions(-)

diff --git 
a/tools/vulnogram/generate-cve-json/src/generate_cve_json/cve_json.py 
b/tools/vulnogram/generate-cve-json/src/generate_cve_json/cve_json.py
index 339a644..79a57de 100644
--- a/tools/vulnogram/generate-cve-json/src/generate_cve_json/cve_json.py
+++ b/tools/vulnogram/generate-cve-json/src/generate_cve_json/cve_json.py
@@ -177,7 +177,6 @@ def _populate_constants() -> None:
     global DEFAULT_ASF_ORG_ID, GENERATOR_TAG, SKILL_SOURCE_URL
     global PROJECT_DISPLAY_MAP, PACKAGE_RE
     global TOP_LEVEL_NAME, TOP_LEVEL_PRODUCT, PROJECT_PRODUCT_TEMPLATE
-    global PROJECT_PREFIX
     global CNA_PRIVATE_PROJECT_URL, CNA_PRIVATE_OWNER, CNA_PRIVATE_USERS_LIST
     global TITLE_STRIP_RE, TRACKER_FILTER_TOKEN
 
@@ -196,9 +195,6 @@ def _populate_constants() -> None:
     TOP_LEVEL_NAME = cfg["packages"]["top_level_name"]
     TOP_LEVEL_PRODUCT = cfg["packages"]["top_level_product"]
     PROJECT_PRODUCT_TEMPLATE = cfg["packages"]["project_product_template"]
-    # Convenient derived constant: the prefix used to detect
-    # "project" subpackages (e.g. ``<top-level>-project-``).
-    PROJECT_PREFIX = f"{TOP_LEVEL_NAME}-project-"
 
     # Per-project CVE 5.x `CNA_private` envelope fields. These end up
     # in every CVE record this tool generates for the project, and
@@ -244,7 +240,6 @@ PACKAGE_RE: re.Pattern[str] = re.compile("")
 TOP_LEVEL_NAME: str = ""
 TOP_LEVEL_PRODUCT: str = ""
 PROJECT_PRODUCT_TEMPLATE: str = ""
-PROJECT_PREFIX: str = ""
 CNA_PRIVATE_PROJECT_URL: str = ""
 CNA_PRIVATE_OWNER: str = ""
 CNA_PRIVATE_USERS_LIST: str = ""
@@ -684,11 +679,15 @@ def _product_for_package(
 
     * `package_name == TOP_LEVEL_NAME` → `TOP_LEVEL_PRODUCT` (e.g.
       `apache-airflow` → `Apache Airflow`).
-    * `package_name` matches `<TOP_LEVEL_NAME>-project-<dir>` →
+    * `package_name` matches `PACKAGE_RE` and the regex captures a
+      non-empty named `project` group →
       `PROJECT_PRODUCT_TEMPLATE.format(display=...)`, where `display`
-      is `PROJECT_DISPLAY_MAP[<dir>]` when the directory name is
+      is `PROJECT_DISPLAY_MAP[<project>]` when the directory name is
       known, or a title-cased dash-split fallback otherwise
-      (`foo-bar` → `Foo Bar`).
+      (`foo-bar` → `Foo Bar`). The `project` group encodes the
+      project's subpackage convention (`-project-<dir>`,
+      `-providers-<dir>`, …) — whatever the regex declares — so the
+      mapping works for any subpackage prefix the project ships.
     * `product_overrides` lets callers shadow either source by package
       name — used by the `--product-for` CLI flag for unknown
       subpackages or acronyms that don't round-trip through `title()`.
@@ -705,13 +704,15 @@ def _product_for_package(
         return overrides[package_name]
     if package_name == TOP_LEVEL_NAME:
         return TOP_LEVEL_PRODUCT
-    if package_name.startswith(PROJECT_PREFIX):
-        project_dir = package_name[len(PROJECT_PREFIX) :]
-        if project_dir in PROJECT_DISPLAY_MAP:
-            display = PROJECT_DISPLAY_MAP[project_dir]
-        else:
-            display = " ".join(part.title() for part in project_dir.split("-") 
if part)
-        return PROJECT_PRODUCT_TEMPLATE.format(display=display)
+    match = PACKAGE_RE.fullmatch(package_name) or 
PACKAGE_RE.match(package_name)
+    if match is not None:
+        project_dir = (match.groupdict().get("project") or "").strip()
+        if project_dir:
+            if project_dir in PROJECT_DISPLAY_MAP:
+                display = PROJECT_DISPLAY_MAP[project_dir]
+            else:
+                display = " ".join(part.title() for part in 
project_dir.split("-") if part)
+            return PROJECT_PRODUCT_TEMPLATE.format(display=display)
     return package_name
 
 
diff --git 
a/tools/vulnogram/generate-cve-json/tests/fixtures/cve-json-config-providers.toml
 
b/tools/vulnogram/generate-cve-json/tests/fixtures/cve-json-config-providers.toml
new file mode 100644
index 0000000..e691eb8
--- /dev/null
+++ 
b/tools/vulnogram/generate-cve-json/tests/fixtures/cve-json-config-providers.toml
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# generate-cve-json — TEST FIXTURE config (regression coverage for
+# subpackage prefixes other than `-project-`).
+#
+# This fixture mirrors a project that ships subpackages under a
+# `-providers-<dir>` convention (the shape used by, for example,
+# Apache Airflow's third-party-integration packages on PyPI). The
+# `_product_for_package` lookup must read the `project` group from
+# whatever the configured `package_pattern` declares — not assume
+# a literal `-project-` substring — so the display-map lookup
+# fires for any subpackage convention an adopter chooses.
+
+[product]
+vendor = "Apache Software Foundation"
+default_product = "Apache Example"
+default_package_name = "apache-example"
+default_collection_url = "https://pypi.python.org";
+
+[cna]
+org_id = "f0158376-9dc2-43b6-827c-5f631a4d8d09"
+
+[cna_private]
+project_url = "https://example.apache.org/";
+owner = "example"
+users_list = "[email protected]"
+
+[meta]
+tracker_repo = "apache-example-s/apache-example-s"
+generator_tag = "apache-example-s/generate_cve_json.py"
+
+[packages]
+# Subpackages live under `apache-example-providers-<dir>` here, not
+# under `-project-<dir>`. The named `project` group is still required
+# and consumed the same way by the generator.
+package_pattern = 
'^(?P<package>apache-example(?:-providers-(?P<project>[a-z0-9][a-z0-9_-]*))?)(?:\s+(?P<rest>.*))?$'
+
+top_level_name = "apache-example"
+top_level_product = "Apache Example"
+project_product_template = "Apache Example {display} provider"
+
+[packages.project_display_map]
+"cncf-kubernetes" = "CNCF Kubernetes"
+"amazon" = "Amazon"
+"apache-spark" = "Apache Spark"
diff --git a/tools/vulnogram/generate-cve-json/tests/test_generate_cve_json.py 
b/tools/vulnogram/generate-cve-json/tests/test_generate_cve_json.py
index 55809ab..6a4c287 100644
--- a/tools/vulnogram/generate-cve-json/tests/test_generate_cve_json.py
+++ b/tools/vulnogram/generate-cve-json/tests/test_generate_cve_json.py
@@ -25,8 +25,11 @@ CLI end-to-end.
 
 from __future__ import annotations
 
+from collections.abc import Iterator
 from typing import Any
 
+import pytest
+
 from generate_cve_json import (
     _build_attachment_body,
     _is_cna_ready_for_review,
@@ -329,6 +332,70 @@ class TestProductForPackage:
         )
 
 
+# ---------------------------------------------------------------------------
+# Product-name resolution for projects whose subpackages use a
+# convention other than `-project-` (regression coverage for the bug
+# where the lookup was anchored on a hardcoded `-project-` prefix
+# instead of reading the `project` group from the configured
+# `package_pattern`).
+# ---------------------------------------------------------------------------
+
+
+class TestProductForPackageProvidersStyle:
+    """Subpackage prefix is `-providers-`, not `-project-`.
+
+    Mirrors the shape Apache Airflow ships on PyPI
+    (`apache-airflow-providers-<dir>`). The lookup must read the
+    `project` named group from whatever the configured
+    `package_pattern` declares — the prefix is a project-level
+    convention, not a generator constant.
+    """
+
+    @pytest.fixture(autouse=True)
+    def _providers_config(self) -> Iterator[None]:
+        from pathlib import Path
+
+        from generate_cve_json.cve_json import _set_config_path
+
+        fixture = Path(__file__).resolve().parent / "fixtures" / 
"cve-json-config-providers.toml"
+        default = Path(__file__).resolve().parent / "fixtures" / 
"cve-json-config.toml"
+        _set_config_path(fixture)
+        try:
+            yield
+        finally:
+            _set_config_path(default)
+
+    def test_known_provider_uses_display_map_casing(self):
+        # `cncf-kubernetes` is captured by the `(?P<project>...)`
+        # group of the configured pattern and looked up in the
+        # display map — the resolver must not fall through to
+        # returning the raw package name.
+        assert (
+            _product_for_package("apache-example-providers-cncf-kubernetes")
+            == "Apache Example CNCF Kubernetes provider"
+        )
+
+    def test_unknown_provider_falls_back_to_title_case(self):
+        assert (
+            _product_for_package("apache-example-providers-madeup-widget")
+            == "Apache Example Madeup Widget provider"
+        )
+
+    def test_top_level_still_resolves(self):
+        assert _product_for_package("apache-example") == "Apache Example"
+
+    def test_overrides_still_win(self):
+        assert (
+            _product_for_package(
+                "apache-example-providers-cncf-kubernetes",
+                product_overrides={
+                    "apache-example-providers-cncf-kubernetes": "CUSTOM",
+                },
+            )
+            == "CUSTOM"
+        )
+
+
 # ---------------------------------------------------------------------------
 # Multi-product `build_affected`
 # ---------------------------------------------------------------------------

Reply via email to