From: Stefano Tondo <[email protected]>
Add version extraction, PURL generation, and external references
to source download packages in SPDX 3.0 SBOMs:
- Extract version from SRCREV for Git sources (full SHA-1)
- Generate PURLs for Git sources on github.com by default
- Support custom mappings via SPDX_GIT_PURL_MAPPINGS variable
(format: "domain:purl_type", split(':', 1) for parsing)
- Use ecosystem PURLs from SPDX_PACKAGE_URLS for non-Git
- Add VCS external references for Git downloads
- Add distribution external references for tarball downloads
- Parse Git URLs using urllib.parse
- Extract logic into _generate_git_purl() and
_enrich_source_package() helpers
The SPDX_GIT_PURL_MAPPINGS variable allows configuring PURL
generation for self-hosted Git services (e.g., GitLab).
github.com is always mapped to pkg:github by default.
Signed-off-by: Stefano Tondo <[email protected]>
---
meta/classes/create-spdx-3.0.bbclass | 7 ++
meta/lib/oe/spdx30_tasks.py | 122 +++++++++++++++++++++++++++
2 files changed, 129 insertions(+)
diff --git a/meta/classes/create-spdx-3.0.bbclass
b/meta/classes/create-spdx-3.0.bbclass
index def2dacbc3..9e912b34e1 100644
--- a/meta/classes/create-spdx-3.0.bbclass
+++ b/meta/classes/create-spdx-3.0.bbclass
@@ -152,6 +152,13 @@ SPDX_PACKAGE_URLS[doc] = "A space separated list of
Package URLs (purls) for \
Override this variable to replace the default, otherwise append or prepend
\
to add additional purls."
+SPDX_GIT_PURL_MAPPINGS ??= ""
+SPDX_GIT_PURL_MAPPINGS[doc] = "A space separated list of domain:purl_type \
+ mappings to configure PURL generation for Git source downloads. \
+ For example, "gitlab.example.com:pkg:gitlab" maps repositories hosted \
+ on gitlab.example.com to the pkg:gitlab PURL type. \
+ github.com is always mapped to pkg:github by default."
+
IMAGE_CLASSES:append = " create-spdx-image-3.0"
SDK_CLASSES += "create-spdx-sdk-3.0"
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
index c3a23d7889..1f6c84628d 100644
--- a/meta/lib/oe/spdx30_tasks.py
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -13,6 +13,7 @@ import oe.spdx30
import oe.spdx_common
import oe.sdk
import os
+import urllib.parse
from contextlib import contextmanager
from datetime import datetime, timezone
@@ -377,6 +378,125 @@ def collect_dep_sources(dep_objsets, dest):
index_sources_by_hash(e.to, dest)
+def _generate_git_purl(d, download_location, srcrev):
+ """Generate a Package URL for a Git source from its download location.
+
+ Parses the Git URL to identify the hosting service and generates the
+ appropriate PURL type. Supports github.com by default and custom
+ mappings via SPDX_GIT_PURL_MAPPINGS.
+
+ Returns the PURL string or None if no mapping matches.
+ """
+ if not download_location or not download_location.startswith('git+'):
+ return None
+
+ git_url = download_location[4:] # Remove 'git+' prefix
+
+ # Default handler: github.com
+ git_purl_handlers = {
+ 'github.com': 'pkg:github',
+ }
+
+ # Custom PURL mappings from SPDX_GIT_PURL_MAPPINGS
+ # Format: "domain1:purl_type1 domain2:purl_type2"
+ custom_mappings = d.getVar('SPDX_GIT_PURL_MAPPINGS')
+ if custom_mappings:
+ for mapping in custom_mappings.split():
+ parts = mapping.split(':', 1)
+ if len(parts) == 2:
+ git_purl_handlers[parts[0]] = parts[1]
+ bb.debug(2, f"Added custom Git PURL mapping: {parts[0]} ->
{parts[1]}")
+ else:
+ bb.warn(f"Invalid SPDX_GIT_PURL_MAPPINGS entry: {mapping}
(expected format: domain:purl_type)")
+
+ try:
+ parsed = urllib.parse.urlparse(git_url)
+ except Exception:
+ return None
+
+ hostname = parsed.hostname
+ if not hostname:
+ return None
+
+ for domain, purl_type in git_purl_handlers.items():
+ if hostname == domain:
+ path = parsed.path.strip('/')
+ path_parts = path.split('/')
+ if len(path_parts) >= 2:
+ owner = path_parts[0]
+ repo = path_parts[1].replace('.git', '')
+ return f"{purl_type}/{owner}/{repo}@{srcrev}"
+ break
+
+ return None
+
+
+def _enrich_source_package(d, dl, fd, file_name, primary_purpose):
+ """Enrich a source download package with version, PURL, and external refs.
+
+ Extracts version from SRCREV for Git sources, generates PURLs for
+ known hosting services, and adds external references for VCS,
+ distribution URLs, and homepage.
+ """
+ version = None
+ purl = None
+
+ if fd.type == "git":
+ # Use full SHA-1 from fd.revision
+ srcrev = getattr(fd, 'revision', None)
+ if srcrev and srcrev not in {'${AUTOREV}', 'AUTOINC', 'INVALID'}:
+ version = srcrev
+
+ # Generate PURL for Git hosting services
+ download_location = getattr(dl, 'software_downloadLocation', None)
+ if version and download_location:
+ purl = _generate_git_purl(d, download_location, version)
+ else:
+ # For non-Git sources, use recipe PV as version
+ pv = d.getVar('PV')
+ if pv and pv not in {'git', 'AUTOINC', 'INVALID', '${PV}'}:
+ version = pv
+
+ # Use ecosystem PURL from SPDX_PACKAGE_URLS if available
+ package_urls = (d.getVar('SPDX_PACKAGE_URLS') or '').split()
+ for url in package_urls:
+ if not url.startswith('pkg:yocto'):
+ purl = url
+ break
+
+ if version:
+ dl.software_packageVersion = version
+
+ if purl:
+ dl.software_packageUrl = purl
+
+ # Add external references
+ download_location = getattr(dl, 'software_downloadLocation', None)
+ if download_location and isinstance(download_location, str):
+ dl.externalRef = dl.externalRef or []
+
+ if download_location.startswith('git+'):
+ # VCS reference for Git repositories
+ git_url = download_location[4:]
+ if '@' in git_url:
+ git_url = git_url.split('@')[0]
+
+ dl.externalRef.append(
+ oe.spdx30.ExternalRef(
+ externalRefType=oe.spdx30.ExternalRefType.vcs,
+ locator=[git_url],
+ )
+ )
+ elif download_location.startswith(('http://', 'https://', 'ftp://')):
+ # Distribution reference for tarball/archive downloads
+ dl.externalRef.append(
+ oe.spdx30.ExternalRef(
+
externalRefType=oe.spdx30.ExternalRefType.altDownloadLocation,
+ locator=[download_location],
+ )
+ )
+
+
def add_download_files(d, objset):
inputs = set()
@@ -440,6 +560,8 @@ def add_download_files(d, objset):
)
)
+ _enrich_source_package(d, dl, fd, file_name, primary_purpose)
+
if fd.method.supports_checksum(fd):
# TODO Need something better than hard coding this
for checksum_id in ["sha256", "sha1"]:
--
2.53.0
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#232713):
https://lists.openembedded.org/g/openembedded-core/message/232713
Mute This Topic: https://lists.openembedded.org/mt/118221139/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-