This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-steward.git
The following commit(s) were added to refs/heads/main by this push:
new 9b56ffd generate-cve-json: derive project_dir from package_pattern
regex (#161)
9b56ffd is described below
commit 9b56ffdee4778b04e5318540d499b06d6bcb4971
Author: Jarek Potiuk <[email protected]>
AuthorDate: Fri May 15 03:33:48 2026 +0200
generate-cve-json: derive project_dir from package_pattern regex (#161)
`_product_for_package` was anchored on a hardcoded
`PROJECT_PREFIX = f"{TOP_LEVEL_NAME}-project-"`, which only matches
projects whose subpackages happen to live under `-project-<dir>`.
Projects that ship subpackages under a different prefix —
`-providers-<dir>` is the realistic example shipped by Apache
Airflow on PyPI — saw the prefix check fail and fell through to
returning the raw PyPI package name as `product`. The project's
`project_display_map` and `project_product_template` were silently
ignored even when the corresponding `<project>` group was correctly
captured by `package_pattern`.
The fix uses the `project` named group from the configured
`package_pattern` regex instead of slicing on a hardcoded prefix.
The prefix is now a project-level convention encoded in the regex,
not a generator constant. Adopters can use whatever subpackage
prefix matches their PyPI taxonomy (`-project-`, `-providers-`, …)
and the display-map lookup fires for any of them.
`PROJECT_PREFIX` is dropped entirely; nothing else in the codebase
read it.
Adds a regression fixture (`cve-json-config-providers.toml`) and a
`TestProductForPackageProvidersStyle` class exercising a
`-providers-<dir>` taxonomy, so a future revert of this fix would
fail loudly.
Generated-by: Claude Code (Claude Opus 4.7)
---
.../src/generate_cve_json/cve_json.py | 31 +++++-----
.../tests/fixtures/cve-json-config-providers.toml | 60 +++++++++++++++++++
.../tests/test_generate_cve_json.py | 67 ++++++++++++++++++++++
3 files changed, 143 insertions(+), 15 deletions(-)
diff --git
a/tools/vulnogram/generate-cve-json/src/generate_cve_json/cve_json.py
b/tools/vulnogram/generate-cve-json/src/generate_cve_json/cve_json.py
index 339a644..79a57de 100644
--- a/tools/vulnogram/generate-cve-json/src/generate_cve_json/cve_json.py
+++ b/tools/vulnogram/generate-cve-json/src/generate_cve_json/cve_json.py
@@ -177,7 +177,6 @@ def _populate_constants() -> None:
global DEFAULT_ASF_ORG_ID, GENERATOR_TAG, SKILL_SOURCE_URL
global PROJECT_DISPLAY_MAP, PACKAGE_RE
global TOP_LEVEL_NAME, TOP_LEVEL_PRODUCT, PROJECT_PRODUCT_TEMPLATE
- global PROJECT_PREFIX
global CNA_PRIVATE_PROJECT_URL, CNA_PRIVATE_OWNER, CNA_PRIVATE_USERS_LIST
global TITLE_STRIP_RE, TRACKER_FILTER_TOKEN
@@ -196,9 +195,6 @@ def _populate_constants() -> None:
TOP_LEVEL_NAME = cfg["packages"]["top_level_name"]
TOP_LEVEL_PRODUCT = cfg["packages"]["top_level_product"]
PROJECT_PRODUCT_TEMPLATE = cfg["packages"]["project_product_template"]
- # Convenient derived constant: the prefix used to detect
- # "project" subpackages (e.g. ``<top-level>-project-``).
- PROJECT_PREFIX = f"{TOP_LEVEL_NAME}-project-"
# Per-project CVE 5.x `CNA_private` envelope fields. These end up
# in every CVE record this tool generates for the project, and
@@ -244,7 +240,6 @@ PACKAGE_RE: re.Pattern[str] = re.compile("")
TOP_LEVEL_NAME: str = ""
TOP_LEVEL_PRODUCT: str = ""
PROJECT_PRODUCT_TEMPLATE: str = ""
-PROJECT_PREFIX: str = ""
CNA_PRIVATE_PROJECT_URL: str = ""
CNA_PRIVATE_OWNER: str = ""
CNA_PRIVATE_USERS_LIST: str = ""
@@ -684,11 +679,15 @@ def _product_for_package(
* `package_name == TOP_LEVEL_NAME` → `TOP_LEVEL_PRODUCT` (e.g.
`apache-airflow` → `Apache Airflow`).
- * `package_name` matches `<TOP_LEVEL_NAME>-project-<dir>` →
+ * `package_name` matches `PACKAGE_RE` and the regex captures a
+ non-empty named `project` group →
`PROJECT_PRODUCT_TEMPLATE.format(display=...)`, where `display`
- is `PROJECT_DISPLAY_MAP[<dir>]` when the directory name is
+ is `PROJECT_DISPLAY_MAP[<project>]` when the directory name is
known, or a title-cased dash-split fallback otherwise
- (`foo-bar` → `Foo Bar`).
+ (`foo-bar` → `Foo Bar`). The `project` group encodes the
+ project's subpackage convention (`-project-<dir>`,
+ `-providers-<dir>`, …) — whatever the regex declares — so the
+ mapping works for any subpackage prefix the project ships.
* `product_overrides` lets callers shadow either source by package
name — used by the `--product-for` CLI flag for unknown
subpackages or acronyms that don't round-trip through `title()`.
@@ -705,13 +704,15 @@ def _product_for_package(
return overrides[package_name]
if package_name == TOP_LEVEL_NAME:
return TOP_LEVEL_PRODUCT
- if package_name.startswith(PROJECT_PREFIX):
- project_dir = package_name[len(PROJECT_PREFIX) :]
- if project_dir in PROJECT_DISPLAY_MAP:
- display = PROJECT_DISPLAY_MAP[project_dir]
- else:
- display = " ".join(part.title() for part in project_dir.split("-")
if part)
- return PROJECT_PRODUCT_TEMPLATE.format(display=display)
+ match = PACKAGE_RE.fullmatch(package_name) or
PACKAGE_RE.match(package_name)
+ if match is not None:
+ project_dir = (match.groupdict().get("project") or "").strip()
+ if project_dir:
+ if project_dir in PROJECT_DISPLAY_MAP:
+ display = PROJECT_DISPLAY_MAP[project_dir]
+ else:
+ display = " ".join(part.title() for part in
project_dir.split("-") if part)
+ return PROJECT_PRODUCT_TEMPLATE.format(display=display)
return package_name
diff --git
a/tools/vulnogram/generate-cve-json/tests/fixtures/cve-json-config-providers.toml
b/tools/vulnogram/generate-cve-json/tests/fixtures/cve-json-config-providers.toml
new file mode 100644
index 0000000..e691eb8
--- /dev/null
+++
b/tools/vulnogram/generate-cve-json/tests/fixtures/cve-json-config-providers.toml
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# generate-cve-json — TEST FIXTURE config (regression coverage for
+# subpackage prefixes other than `-project-`).
+#
+# This fixture mirrors a project that ships subpackages under a
+# `-providers-<dir>` convention (the shape used by, for example,
+# Apache Airflow's third-party-integration packages on PyPI). The
+# `_product_for_package` lookup must read the `project` group from
+# whatever the configured `package_pattern` declares — not assume
+# a literal `-project-` substring — so the display-map lookup
+# fires for any subpackage convention an adopter chooses.
+
+[product]
+vendor = "Apache Software Foundation"
+default_product = "Apache Example"
+default_package_name = "apache-example"
+default_collection_url = "https://pypi.python.org"
+
+[cna]
+org_id = "f0158376-9dc2-43b6-827c-5f631a4d8d09"
+
+[cna_private]
+project_url = "https://example.apache.org/"
+owner = "example"
+users_list = "[email protected]"
+
+[meta]
+tracker_repo = "apache-example-s/apache-example-s"
+generator_tag = "apache-example-s/generate_cve_json.py"
+
+[packages]
+# Subpackages live under `apache-example-providers-<dir>` here, not
+# under `-project-<dir>`. The named `project` group is still required
+# and consumed the same way by the generator.
+package_pattern =
'^(?P<package>apache-example(?:-providers-(?P<project>[a-z0-9][a-z0-9_-]*))?)(?:\s+(?P<rest>.*))?$'
+
+top_level_name = "apache-example"
+top_level_product = "Apache Example"
+project_product_template = "Apache Example {display} provider"
+
+[packages.project_display_map]
+"cncf-kubernetes" = "CNCF Kubernetes"
+"amazon" = "Amazon"
+"apache-spark" = "Apache Spark"
diff --git a/tools/vulnogram/generate-cve-json/tests/test_generate_cve_json.py
b/tools/vulnogram/generate-cve-json/tests/test_generate_cve_json.py
index 55809ab..6a4c287 100644
--- a/tools/vulnogram/generate-cve-json/tests/test_generate_cve_json.py
+++ b/tools/vulnogram/generate-cve-json/tests/test_generate_cve_json.py
@@ -25,8 +25,11 @@ CLI end-to-end.
from __future__ import annotations
+from collections.abc import Iterator
from typing import Any
+import pytest
+
from generate_cve_json import (
_build_attachment_body,
_is_cna_ready_for_review,
@@ -329,6 +332,70 @@ class TestProductForPackage:
)
+# ---------------------------------------------------------------------------
+# Product-name resolution for projects whose subpackages use a
+# convention other than `-project-` (regression coverage for the bug
+# where the lookup was anchored on a hardcoded `-project-` prefix
+# instead of reading the `project` group from the configured
+# `package_pattern`).
+# ---------------------------------------------------------------------------
+
+
+class TestProductForPackageProvidersStyle:
+ """Subpackage prefix is `-providers-`, not `-project-`.
+
+ Mirrors the shape Apache Airflow ships on PyPI
+ (`apache-airflow-providers-<dir>`). The lookup must read the
+ `project` named group from whatever the configured
+ `package_pattern` declares — the prefix is a project-level
+ convention, not a generator constant.
+ """
+
+ @pytest.fixture(autouse=True)
+ def _providers_config(self) -> Iterator[None]:
+ from pathlib import Path
+
+ from generate_cve_json.cve_json import _set_config_path
+
+ fixture = Path(__file__).resolve().parent / "fixtures" /
"cve-json-config-providers.toml"
+ default = Path(__file__).resolve().parent / "fixtures" /
"cve-json-config.toml"
+ _set_config_path(fixture)
+ try:
+ yield
+ finally:
+ _set_config_path(default)
+
+ def test_known_provider_uses_display_map_casing(self):
+ # `cncf-kubernetes` is captured by the `(?P<project>...)`
+ # group of the configured pattern and looked up in the
+ # display map — the resolver must not fall through to
+ # returning the raw package name.
+ assert (
+ _product_for_package("apache-example-providers-cncf-kubernetes")
+ == "Apache Example CNCF Kubernetes provider"
+ )
+
+ def test_unknown_provider_falls_back_to_title_case(self):
+ assert (
+ _product_for_package("apache-example-providers-madeup-widget")
+ == "Apache Example Madeup Widget provider"
+ )
+
+ def test_top_level_still_resolves(self):
+ assert _product_for_package("apache-example") == "Apache Example"
+
+ def test_overrides_still_win(self):
+ assert (
+ _product_for_package(
+ "apache-example-providers-cncf-kubernetes",
+ product_overrides={
+ "apache-example-providers-cncf-kubernetes": "CUSTOM",
+ },
+ )
+ == "CUSTOM"
+ )
+
+
# ---------------------------------------------------------------------------
# Multi-product `build_affected`
# ---------------------------------------------------------------------------